summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOran Agra <oran@redislabs.com>2015-07-21 09:44:16 +0300
committerOran Agra <oran@redislabs.com>2015-07-21 09:44:16 +0300
commit9e67df2a39a37386cb38701f287857fd95c31527 (patch)
treef2535d87455a9f28251d617db62758ebd9766cb6
parentf472bb106982f2dee101c70886a47992b1d0896a (diff)
parentbcb4d091233a98b025aac18b3a7e4966356323eb (diff)
downloadredis-9e67df2a39a37386cb38701f287857fd95c31527.tar.gz
rebase from unstable
-rw-r--r--COPYING2
-rw-r--r--README.md6
-rw-r--r--deps/Makefile7
-rw-r--r--deps/geohash-int/Makefile23
-rw-r--r--deps/geohash-int/geohash.c295
-rw-r--r--deps/geohash-int/geohash.h118
-rw-r--r--deps/geohash-int/geohash_helper.c195
-rw-r--r--deps/geohash-int/geohash_helper.h71
-rw-r--r--deps/hiredis/async.c1
-rw-r--r--deps/lua/src/ldo.c2
-rw-r--r--deps/lua/src/lua_cmsgpack.c71
-rw-r--r--redis.conf13
-rw-r--r--src/Makefile9
-rw-r--r--src/anet.c30
-rw-r--r--src/anet.h1
-rw-r--r--src/aof.c2
-rw-r--r--src/bio.c7
-rw-r--r--src/blocked.c46
-rw-r--r--src/cluster.c453
-rw-r--r--src/cluster.h8
-rw-r--r--src/config.c686
-rw-r--r--src/db.c14
-rw-r--r--src/debug.c21
-rw-r--r--src/dict.c142
-rw-r--r--src/dict.h2
-rw-r--r--src/geo.c706
-rw-r--r--src/geo.h22
-rw-r--r--src/latency.c2
-rw-r--r--src/networking.c85
-rw-r--r--src/object.c6
-rw-r--r--src/rdb.c2
-rw-r--r--src/redis-cli.c6
-rw-r--r--src/redis.c90
-rw-r--r--src/redis.h29
-rw-r--r--src/replication.c10
-rw-r--r--src/scripting.c22
-rw-r--r--src/sds.c7
-rw-r--r--src/sentinel.c887
-rw-r--r--src/sha1.c14
-rw-r--r--src/sha1.h8
-rw-r--r--src/sort.c2
-rw-r--r--src/t_set.c20
-rw-r--r--src/t_zset.c141
-rw-r--r--src/ziplist.h5
-rw-r--r--tests/cluster/run.tcl1
-rw-r--r--tests/cluster/tests/10-manual-failover.tcl192
-rw-r--r--tests/cluster/tests/11-manual-takeover.tcl59
-rw-r--r--tests/instances.tcl14
-rw-r--r--tests/integration/replication-4.tcl2
-rw-r--r--tests/integration/replication.tcl52
-rw-r--r--tests/sentinel/run.tcl1
-rw-r--r--tests/sentinel/tests/06-ckquorum.tcl34
-rw-r--r--tests/support/server.tcl7
-rw-r--r--tests/unit/geo.tcl158
-rw-r--r--tests/unit/type/set.tcl1
-rw-r--r--tests/unit/type/zset.tcl80
-rw-r--r--utils/create-cluster/.gitignore4
-rwxr-xr-xutils/create-cluster/create-cluster4
-rwxr-xr-xutils/releasetools/03_test_release.sh (renamed from utils/releasetools/00_test_release.sh)0
-rwxr-xr-xutils/releasetools/04_release_hash.sh (renamed from utils/releasetools/03_release_hash.sh)0
60 files changed, 3773 insertions, 1125 deletions
diff --git a/COPYING b/COPYING
index a58de44dd..ac68e012b 100644
--- a/COPYING
+++ b/COPYING
@@ -1,4 +1,4 @@
-Copyright (c) 2006-2014, Salvatore Sanfilippo
+Copyright (c) 2006-2015, Salvatore Sanfilippo
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
diff --git a/README.md b/README.md
index 0565418f6..223ef13c6 100644
--- a/README.md
+++ b/README.md
@@ -3,15 +3,15 @@ This README is just a fast *quick start* document. You can find more detailed do
What is Redis?
--------------
-Redis is often referred as a *data structures* server. What this means is that Redis provides access to mutable data structures via a set of commands, which are send using a *server-client* model with TCP sockets and a simple protocol. So different processes can query and modify the same data structures in a shared way.
+Redis is often referred as a *data structures* server. What this means is that Redis provides access to mutable data structures via a set of commands, which are sent using a *server-client* model with TCP sockets and a simple protocol. So different processes can query and modify the same data structures in a shared way.
Data structures implemented into Redis have a few special properties:
* Redis cares to store them on disk, even if they are always served and modified into the server memory. This means that Redis is fast, but that is also non-volatile.
* Implementation of data structures stress on memory efficiency, so data structures inside Redis will likely use less memory compared to the same data structure modeled using an high level programming language.
-* Redis offers a number of features that are natural to find into a database, like replication, tunable levels of durability, cluster, high availability.
+* Redis offers a number of features that are natural to find in a database, like replication, tunable levels of durability, cluster, high availability.
-Another good example is to think at Redis as a more complex version of memcached, where the opeations are not just SETs and GETs, but operations to work with complex data types like Lists, Sets, ordered data structures, and so forth.
+Another good example is to think of Redis as a more complex version of memcached, where the operations are not just SETs and GETs, but operations to work with complex data types like Lists, Sets, ordered data structures, and so forth.
If you want to know more, this is a list of selected starting points:
diff --git a/deps/Makefile b/deps/Makefile
index 71f6d3a2c..10ae6e790 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -36,6 +36,7 @@ distclean:
-(cd hiredis && $(MAKE) clean) > /dev/null || true
-(cd linenoise && $(MAKE) clean) > /dev/null || true
-(cd lua && $(MAKE) clean) > /dev/null || true
+ -(cd geohash-int && $(MAKE) clean) > /dev/null || true
-(cd jemalloc && [ -f Makefile ] && $(MAKE) distclean) > /dev/null || true
-(rm -f .make-*)
@@ -81,3 +82,9 @@ jemalloc: .make-prerequisites
cd jemalloc && $(MAKE) CFLAGS="$(JEMALLOC_CFLAGS)" LDFLAGS="$(JEMALLOC_LDFLAGS)" lib/libjemalloc.a
.PHONY: jemalloc
+
+geohash-int: .make-prerequisites
+ @printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
+ cd geohash-int && $(MAKE)
+
+.PHONY: geohash-int
diff --git a/deps/geohash-int/Makefile b/deps/geohash-int/Makefile
new file mode 100644
index 000000000..b7c259577
--- /dev/null
+++ b/deps/geohash-int/Makefile
@@ -0,0 +1,23 @@
+STD=
+WARN= -Wall
+OPT= -O2
+
+R_CFLAGS= $(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS)
+R_LDFLAGS= $(LDFLAGS)
+DEBUG= -g
+
+R_CC=$(CC) $(R_CFLAGS)
+R_LD=$(CC) $(R_LDFLAGS)
+
+all: geohash.o geohash_helper.o
+
+.PHONY: all
+
+geohash.o: geohash.h geohash.c
+geohash_helper.o: geohash.h geohash_helper.h geohash_helper.c
+
+.c.o:
+ $(R_CC) -c $<
+
+clean:
+ rm -f *.o
diff --git a/deps/geohash-int/geohash.c b/deps/geohash-int/geohash.c
new file mode 100644
index 000000000..d3bc7de25
--- /dev/null
+++ b/deps/geohash-int/geohash.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "geohash.h"
+
+/**
+ * Hashing works like this:
+ * Divide the world into 4 buckets. Label each one as such:
+ * -----------------
+ * | | |
+ * | | |
+ * | 0,1 | 1,1 |
+ * -----------------
+ * | | |
+ * | | |
+ * | 0,0 | 1,0 |
+ * -----------------
+ */
+
+/* Interleave lower bits of x and y, so the bits of x
+ * are in the even positions and bits from y in the odd;
+ * x and y must initially be less than 2**32 (65536).
+ * From: https://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
+ */
+static inline uint64_t interleave64(uint32_t xlo, uint32_t ylo) {
+ static const uint64_t B[] = {0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL};
+ static const unsigned int S[] = {1, 2, 4, 8, 16};
+
+ uint64_t x = xlo;
+ uint64_t y = ylo;
+
+ x = (x | (x << S[4])) & B[4];
+ y = (y | (y << S[4])) & B[4];
+
+ x = (x | (x << S[3])) & B[3];
+ y = (y | (y << S[3])) & B[3];
+
+ x = (x | (x << S[2])) & B[2];
+ y = (y | (y << S[2])) & B[2];
+
+ x = (x | (x << S[1])) & B[1];
+ y = (y | (y << S[1])) & B[1];
+
+ x = (x | (x << S[0])) & B[0];
+ y = (y | (y << S[0])) & B[0];
+
+ return x | (y << 1);
+}
+
+/* reverse the interleave process
+ * derived from http://stackoverflow.com/questions/4909263
+ */
+static inline uint64_t deinterleave64(uint64_t interleaved) {
+ static const uint64_t B[] = {0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
+ static const unsigned int S[] = {0, 1, 2, 4, 8, 16};
+
+ uint64_t x = interleaved;
+ uint64_t y = interleaved >> 1;
+
+ x = (x | (x >> S[0])) & B[0];
+ y = (y | (y >> S[0])) & B[0];
+
+ x = (x | (x >> S[1])) & B[1];
+ y = (y | (y >> S[1])) & B[1];
+
+ x = (x | (x >> S[2])) & B[2];
+ y = (y | (y >> S[2])) & B[2];
+
+ x = (x | (x >> S[3])) & B[3];
+ y = (y | (y >> S[3])) & B[3];
+
+ x = (x | (x >> S[4])) & B[4];
+ y = (y | (y >> S[4])) & B[4];
+
+ x = (x | (x >> S[5])) & B[5];
+ y = (y | (y >> S[5])) & B[5];
+
+ return x | (y << 32);
+}
+
+void geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range) {
+ /* These are constraints from EPSG:900913 / EPSG:3785 / OSGEO:41001 */
+ /* We can't geocode at the north/south pole. */
+ long_range->max = GEO_LONG_MAX;
+ long_range->min = GEO_LONG_MIN;
+ lat_range->max = GEO_LAT_MAX;
+ lat_range->min = GEO_LAT_MIN;
+}
+
+int geohashEncode(GeoHashRange *long_range, GeoHashRange *lat_range,
+ double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash) {
+ /* Check basic arguments sanity. */
+ if (hash == NULL || step > 32 || step == 0 ||
+ RANGEPISZERO(lat_range) || RANGEPISZERO(long_range)) return 0;
+
+ /* Return an error when trying to index outside the supported
+ * constraints. */
+ if (longitude > 180 || longitude < -180 ||
+ latitude > 85.05112878 || latitude < -85.05112878) return 0;
+
+ hash->bits = 0;
+ hash->step = step;
+
+ if (latitude < lat_range->min || latitude > lat_range->max ||
+ longitude < long_range->min || longitude > long_range->max) {
+ return 0;
+ }
+
+ double lat_offset =
+ (latitude - lat_range->min) / (lat_range->max - lat_range->min);
+ double long_offset =
+ (longitude - long_range->min) / (long_range->max - long_range->min);
+
+ /* convert to fixed point based on the step size */
+ lat_offset *= (1 << step);
+ long_offset *= (1 << step);
+ hash->bits = interleave64(lat_offset, long_offset);
+ return 1;
+}
+
+int geohashEncodeType(double longitude, double latitude, uint8_t step, GeoHashBits *hash) {
+ GeoHashRange r[2] = { { 0 } };
+ geohashGetCoordRange(&r[0], &r[1]);
+ return geohashEncode(&r[0], &r[1], longitude, latitude, step, hash);
+}
+
+int geohashEncodeWGS84(double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash) {
+ return geohashEncodeType(longitude, latitude, step, hash);
+}
+
+int geohashDecode(const GeoHashRange long_range, const GeoHashRange lat_range,
+ const GeoHashBits hash, GeoHashArea *area) {
+ if (HASHISZERO(hash) || NULL == area || RANGEISZERO(lat_range) ||
+ RANGEISZERO(long_range)) {
+ return 0;
+ }
+
+ area->hash = hash;
+ uint8_t step = hash.step;
+ uint64_t hash_sep = deinterleave64(hash.bits); /* hash = [LAT][LONG] */
+
+ double lat_scale = lat_range.max - lat_range.min;
+ double long_scale = long_range.max - long_range.min;
+
+ uint32_t ilato = hash_sep; /* get lat part of deinterleaved hash */
+ uint32_t ilono = hash_sep >> 32; /* shift over to get long part of hash */
+
+ /* divide by 2**step.
+ * Then, for 0-1 coordinate, multiply times scale and add
+ to the min to get the absolute coordinate. */
+ area->latitude.min =
+ lat_range.min + (ilato * 1.0 / (1ull << step)) * lat_scale;
+ area->latitude.max =
+ lat_range.min + ((ilato + 1) * 1.0 / (1ull << step)) * lat_scale;
+ area->longitude.min =
+ long_range.min + (ilono * 1.0 / (1ull << step)) * long_scale;
+ area->longitude.max =
+ long_range.min + ((ilono + 1) * 1.0 / (1ull << step)) * long_scale;
+
+ return 1;
+}
+
+int geohashDecodeType(const GeoHashBits hash, GeoHashArea *area) {
+ GeoHashRange r[2] = { { 0 } };
+ geohashGetCoordRange(&r[0], &r[1]);
+ return geohashDecode(r[0], r[1], hash, area);
+}
+
+int geohashDecodeWGS84(const GeoHashBits hash, GeoHashArea *area) {
+ return geohashDecodeType(hash, area);
+}
+
+int geohashDecodeAreaToLongLat(const GeoHashArea *area, double *xy) {
+ if (!xy) return 0;
+ xy[0] = (area->longitude.min + area->longitude.max) / 2;
+ xy[1] = (area->latitude.min + area->latitude.max) / 2;
+ return 1;
+}
+
+int geohashDecodeToLongLatType(const GeoHashBits hash, double *xy) {
+ GeoHashArea area = { { 0 } };
+ if (!xy || !geohashDecodeType(hash, &area))
+ return 0;
+ return geohashDecodeAreaToLongLat(&area, xy);
+}
+
+int geohashDecodeToLongLatWGS84(const GeoHashBits hash, double *xy) {
+ return geohashDecodeToLongLatType(hash, xy);
+}
+
+static void geohash_move_x(GeoHashBits *hash, int8_t d) {
+ if (d == 0)
+ return;
+
+ uint64_t x = hash->bits & 0xaaaaaaaaaaaaaaaaULL;
+ uint64_t y = hash->bits & 0x5555555555555555ULL;
+
+ uint64_t zz = 0x5555555555555555ULL >> (64 - hash->step * 2);
+
+ if (d > 0) {
+ x = x + (zz + 1);
+ } else {
+ x = x | zz;
+ x = x - (zz + 1);
+ }
+
+ x &= (0xaaaaaaaaaaaaaaaaULL >> (64 - hash->step * 2));
+ hash->bits = (x | y);
+}
+
+static void geohash_move_y(GeoHashBits *hash, int8_t d) {
+ if (d == 0)
+ return;
+
+ uint64_t x = hash->bits & 0xaaaaaaaaaaaaaaaaULL;
+ uint64_t y = hash->bits & 0x5555555555555555ULL;
+
+ uint64_t zz = 0xaaaaaaaaaaaaaaaaULL >> (64 - hash->step * 2);
+ if (d > 0) {
+ y = y + (zz + 1);
+ } else {
+ y = y | zz;
+ y = y - (zz + 1);
+ }
+ y &= (0x5555555555555555ULL >> (64 - hash->step * 2));
+ hash->bits = (x | y);
+}
+
+void geohashNeighbors(const GeoHashBits *hash, GeoHashNeighbors *neighbors) {
+ neighbors->east = *hash;
+ neighbors->west = *hash;
+ neighbors->north = *hash;
+ neighbors->south = *hash;
+ neighbors->south_east = *hash;
+ neighbors->south_west = *hash;
+ neighbors->north_east = *hash;
+ neighbors->north_west = *hash;
+
+ geohash_move_x(&neighbors->east, 1);
+ geohash_move_y(&neighbors->east, 0);
+
+ geohash_move_x(&neighbors->west, -1);
+ geohash_move_y(&neighbors->west, 0);
+
+ geohash_move_x(&neighbors->south, 0);
+ geohash_move_y(&neighbors->south, -1);
+
+ geohash_move_x(&neighbors->north, 0);
+ geohash_move_y(&neighbors->north, 1);
+
+ geohash_move_x(&neighbors->north_west, -1);
+ geohash_move_y(&neighbors->north_west, 1);
+
+ geohash_move_x(&neighbors->north_east, 1);
+ geohash_move_y(&neighbors->north_east, 1);
+
+ geohash_move_x(&neighbors->south_east, 1);
+ geohash_move_y(&neighbors->south_east, -1);
+
+ geohash_move_x(&neighbors->south_west, -1);
+ geohash_move_y(&neighbors->south_west, -1);
+}
diff --git a/deps/geohash-int/geohash.h b/deps/geohash-int/geohash.h
new file mode 100644
index 000000000..c2f57bed0
--- /dev/null
+++ b/deps/geohash-int/geohash.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GEOHASH_H_
+#define GEOHASH_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define HASHISZERO(r) (!(r).bits && !(r).step)
+#define RANGEISZERO(r) (!(r).max && !(r).min)
+#define RANGEPISZERO(r) (r == NULL || RANGEISZERO(*r))
+
+#define GEO_STEP_MAX 26 /* 26*2 = 52 bits. */
+
+/* Limits from EPSG:900913 / EPSG:3785 / OSGEO:41001 */
+#define GEO_LAT_MIN -85.05112878
+#define GEO_LAT_MAX 85.05112878
+#define GEO_LONG_MIN -180
+#define GEO_LONG_MAX 180
+
+typedef enum {
+ GEOHASH_NORTH = 0,
+ GEOHASH_EAST,
+ GEOHASH_WEST,
+ GEOHASH_SOUTH,
+ GEOHASH_SOUTH_WEST,
+ GEOHASH_SOUTH_EAST,
+ GEOHASH_NORT_WEST,
+ GEOHASH_NORT_EAST
+} GeoDirection;
+
+typedef struct {
+ uint64_t bits;
+ uint8_t step;
+} GeoHashBits;
+
+typedef struct {
+ double min;
+ double max;
+} GeoHashRange;
+
+typedef struct {
+ GeoHashBits hash;
+ GeoHashRange longitude;
+ GeoHashRange latitude;
+} GeoHashArea;
+
+typedef struct {
+ GeoHashBits north;
+ GeoHashBits east;
+ GeoHashBits west;
+ GeoHashBits south;
+ GeoHashBits north_east;
+ GeoHashBits south_east;
+ GeoHashBits north_west;
+ GeoHashBits south_west;
+} GeoHashNeighbors;
+
+/*
+ * 0:success
+ * -1:failed
+ */
+void geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range);
+int geohashEncode(GeoHashRange *long_range, GeoHashRange *lat_range,
+ double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash);
+int geohashEncodeType(double longitude, double latitude,
+ uint8_t step, GeoHashBits *hash);
+int geohashEncodeWGS84(double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash);
+int geohashDecode(const GeoHashRange long_range, const GeoHashRange lat_range,
+ const GeoHashBits hash, GeoHashArea *area);
+int geohashDecodeType(const GeoHashBits hash, GeoHashArea *area);
+int geohashDecodeWGS84(const GeoHashBits hash, GeoHashArea *area);
+int geohashDecodeAreaToLongLat(const GeoHashArea *area, double *xy);
+int geohashDecodeToLongLatType(const GeoHashBits hash, double *xy);
+int geohashDecodeToLongLatWGS84(const GeoHashBits hash, double *xy);
+int geohashDecodeToLongLatMercator(const GeoHashBits hash, double *xy);
+void geohashNeighbors(const GeoHashBits *hash, GeoHashNeighbors *neighbors);
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* GEOHASH_H_ */
diff --git a/deps/geohash-int/geohash_helper.c b/deps/geohash-int/geohash_helper.c
new file mode 100644
index 000000000..4c3762faf
--- /dev/null
+++ b/deps/geohash-int/geohash_helper.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This is a C++ to C conversion from the ardb project.
+ * This file started out as:
+ * https://github.com/yinqiwen/ardb/blob/d42503/src/geo/geohash_helper.cpp
+ */
+
+#include "geohash_helper.h"
+
+#define D_R (M_PI / 180.0)
+#define R_MAJOR 6378137.0
+#define R_MINOR 6356752.3142
+#define RATIO (R_MINOR / R_MAJOR)
+#define ECCENT (sqrt(1.0 - (RATIO *RATIO)))
+#define COM (0.5 * ECCENT)
+
+/// @brief The usual PI/180 constant
+const double DEG_TO_RAD = 0.017453292519943295769236907684886;
+/// @brief Earth's quatratic mean radius for WGS-84
+const double EARTH_RADIUS_IN_METERS = 6372797.560856;
+
+const double MERCATOR_MAX = 20037726.37;
+const double MERCATOR_MIN = -20037726.37;
+
+static inline double deg_rad(double ang) { return ang * D_R; }
+static inline double rad_deg(double ang) { return ang / D_R; }
+
+/* You must *ONLY* estimate steps when you are encoding.
+ * If you are decoding, always decode to GEO_STEP_MAX (26). */
+uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) {
+ if (range_meters == 0) return 26;
+ int step = 1;
+ while (range_meters < MERCATOR_MAX) {
+ range_meters *= 2;
+ step++;
+ }
+ step -= 2; /* Make sure range is included in the worst case. */
+ /* Wider range torwards the poles... Note: it is possible to do better
+ * than this approximation by computing the distance between meridians
+ * at this latitude, but this does the trick for now. */
+ if (lat > 67 || lat < -67) step--;
+ if (lat > 80 || lat < -80) step--;
+
+ /* Frame to valid range. */
+ if (step < 1) step = 1;
+ if (step > 26) step = 25;
+ return step;
+}
+
+int geohashBitsComparator(const GeoHashBits *a, const GeoHashBits *b) {
+ /* If step not equal, compare on step. Else, compare on bits. */
+ return a->step != b->step ? a->step - b->step : a->bits - b->bits;
+}
+
+int geohashBoundingBox(double longitude, double latitude, double radius_meters,
+ double *bounds) {
+ if (!bounds) return 0;
+
+ double lonr, latr;
+ lonr = deg_rad(longitude);
+ latr = deg_rad(latitude);
+
+ double distance = radius_meters / EARTH_RADIUS_IN_METERS;
+ double min_latitude = latr - distance;
+ double max_latitude = latr + distance;
+
+ /* Note: we're being lazy and not accounting for coordinates near poles */
+ double min_longitude, max_longitude;
+ double difference_longitude = asin(sin(distance) / cos(latr));
+ min_longitude = lonr - difference_longitude;
+ max_longitude = lonr + difference_longitude;
+
+ bounds[0] = rad_deg(min_longitude);
+ bounds[1] = rad_deg(min_latitude);
+ bounds[2] = rad_deg(max_longitude);
+ bounds[3] = rad_deg(max_latitude);
+ return 1;
+}
+
+GeoHashRadius geohashGetAreasByRadius(double longitude, double latitude, double radius_meters) {
+ GeoHashRange long_range, lat_range;
+ GeoHashRadius radius = { { 0 } };
+ GeoHashBits hash = { 0 };
+ GeoHashNeighbors neighbors = { { 0 } };
+ GeoHashArea area = { { 0 } };
+ double min_lon, max_lon, min_lat, max_lat;
+ double bounds[4];
+ int steps;
+
+ geohashBoundingBox(longitude, latitude, radius_meters, bounds);
+ min_lon = bounds[0];
+ min_lat = bounds[1];
+ max_lon = bounds[2];
+ max_lat = bounds[3];
+
+ steps = geohashEstimateStepsByRadius(radius_meters,latitude);
+
+ geohashGetCoordRange(&long_range, &lat_range);
+ geohashEncode(&long_range, &lat_range, longitude, latitude, steps, &hash);
+ geohashNeighbors(&hash, &neighbors);
+ geohashGetCoordRange(&long_range, &lat_range);
+ geohashDecode(long_range, lat_range, hash, &area);
+
+ if (area.latitude.min < min_lat) {
+ GZERO(neighbors.south);
+ GZERO(neighbors.south_west);
+ GZERO(neighbors.south_east);
+ }
+ if (area.latitude.max > max_lat) {
+ GZERO(neighbors.north);
+ GZERO(neighbors.north_east);
+ GZERO(neighbors.north_west);
+ }
+ if (area.longitude.min < min_lon) {
+ GZERO(neighbors.west);
+ GZERO(neighbors.south_west);
+ GZERO(neighbors.north_west);
+ }
+ if (area.longitude.max > max_lon) {
+ GZERO(neighbors.east);
+ GZERO(neighbors.south_east);
+ GZERO(neighbors.north_east);
+ }
+ radius.hash = hash;
+ radius.neighbors = neighbors;
+ radius.area = area;
+ return radius;
+}
+
+GeoHashRadius geohashGetAreasByRadiusWGS84(double longitude, double latitude,
+ double radius_meters) {
+ return geohashGetAreasByRadius(longitude, latitude, radius_meters);
+}
+
+GeoHashFix52Bits geohashAlign52Bits(const GeoHashBits hash) {
+ uint64_t bits = hash.bits;
+ bits <<= (52 - hash.step * 2);
+ return bits;
+}
+
+/* Calculate distance using haversin great circle distance formula. */
+double geohashGetDistance(double lon1d, double lat1d, double lon2d, double lat2d) {
+ double lat1r, lon1r, lat2r, lon2r, u, v;
+ lat1r = deg_rad(lat1d);
+ lon1r = deg_rad(lon1d);
+ lat2r = deg_rad(lat2d);
+ lon2r = deg_rad(lon2d);
+ u = sin((lat2r - lat1r) / 2);
+ v = sin((lon2r - lon1r) / 2);
+ return 2.0 * EARTH_RADIUS_IN_METERS *
+ asin(sqrt(u * u + cos(lat1r) * cos(lat2r) * v * v));
+}
+
+int geohashGetDistanceIfInRadius(double x1, double y1,
+ double x2, double y2, double radius,
+ double *distance) {
+ *distance = geohashGetDistance(x1, y1, x2, y2);
+ if (*distance > radius) return 0;
+ return 1;
+}
+
+int geohashGetDistanceIfInRadiusWGS84(double x1, double y1, double x2,
+ double y2, double radius,
+ double *distance) {
+ return geohashGetDistanceIfInRadius(x1, y1, x2, y2, radius, distance);
+}
diff --git a/deps/geohash-int/geohash_helper.h b/deps/geohash-int/geohash_helper.h
new file mode 100644
index 000000000..bff111dbe
--- /dev/null
+++ b/deps/geohash-int/geohash_helper.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GEOHASH_HELPER_HPP_
+#define GEOHASH_HELPER_HPP_
+
+#include <math.h>
+#include "geohash.h"
+
+#define GZERO(s) s.bits = s.step = 0;
+#define GISZERO(s) (!s.bits && !s.step)
+#define GISNOTZERO(s) (s.bits || s.step)
+
+typedef uint64_t GeoHashFix52Bits;
+typedef uint64_t GeoHashVarBits;
+
+typedef struct {
+ GeoHashBits hash;
+ GeoHashArea area;
+ GeoHashNeighbors neighbors;
+} GeoHashRadius;
+
+int GeoHashBitsComparator(const GeoHashBits *a, const GeoHashBits *b);
+uint8_t geohashEstimateStepsByRadius(double range_meters, double lat);
+int geohashBoundingBox(double longitude, double latitude, double radius_meters,
+ double *bounds);
+GeoHashRadius geohashGetAreasByRadius(double longitude,
+ double latitude, double radius_meters);
+GeoHashRadius geohashGetAreasByRadiusWGS84(double longitude, double latitude,
+ double radius_meters);
+GeoHashRadius geohashGetAreasByRadiusMercator(double longitude, double latitude,
+ double radius_meters);
+GeoHashFix52Bits geohashAlign52Bits(const GeoHashBits hash);
+double geohashGetDistance(double lon1d, double lat1d,
+ double lon2d, double lat2d);
+int geohashGetDistanceIfInRadius(double x1, double y1,
+ double x2, double y2, double radius,
+ double *distance);
+int geohashGetDistanceIfInRadiusWGS84(double x1, double y1, double x2,
+ double y2, double radius,
+ double *distance);
+
+#endif /* GEOHASH_HELPER_HPP_ */
diff --git a/deps/hiredis/async.c b/deps/hiredis/async.c
index f7f343bef..9cc35638f 100644
--- a/deps/hiredis/async.c
+++ b/deps/hiredis/async.c
@@ -443,6 +443,7 @@ void redisProcessCallbacks(redisAsyncContext *ac) {
if (((redisReply*)reply)->type == REDIS_REPLY_ERROR) {
c->err = REDIS_ERR_OTHER;
snprintf(c->errstr,sizeof(c->errstr),"%s",((redisReply*)reply)->str);
+ c->reader->fn->freeObject(reply);
__redisAsyncDisconnect(ac);
return;
}
diff --git a/deps/lua/src/ldo.c b/deps/lua/src/ldo.c
index d1bf786cb..514f7a2a3 100644
--- a/deps/lua/src/ldo.c
+++ b/deps/lua/src/ldo.c
@@ -495,7 +495,7 @@ static void f_parser (lua_State *L, void *ud) {
struct SParser *p = cast(struct SParser *, ud);
int c = luaZ_lookahead(p->z);
luaC_checkGC(L);
- tf = ((c == LUA_SIGNATURE[0]) ? luaU_undump : luaY_parser)(L, p->z,
+ tf = (luaY_parser)(L, p->z,
&p->buff, p->name);
cl = luaF_newLclosure(L, tf->nups, hvalue(gt(L)));
cl->l.p = tf;
diff --git a/deps/lua/src/lua_cmsgpack.c b/deps/lua/src/lua_cmsgpack.c
index e13f053d2..0b82d008d 100644
--- a/deps/lua/src/lua_cmsgpack.c
+++ b/deps/lua/src/lua_cmsgpack.c
@@ -66,7 +66,7 @@
/* Reverse memory bytes if arch is little endian. Given the conceptual
* simplicity of the Lua build system we prefer check for endianess at runtime.
* The performance difference should be acceptable. */
-static void memrevifle(void *ptr, size_t len) {
+void memrevifle(void *ptr, size_t len) {
unsigned char *p = (unsigned char *)ptr,
*e = (unsigned char *)p+len-1,
aux;
@@ -96,7 +96,7 @@ typedef struct mp_buf {
size_t len, free;
} mp_buf;
-static void *mp_realloc(lua_State *L, void *target, size_t osize,size_t nsize) {
+void *mp_realloc(lua_State *L, void *target, size_t osize,size_t nsize) {
void *(*local_realloc) (void *, void *, size_t osize, size_t nsize) = NULL;
void *ud;
@@ -105,7 +105,7 @@ static void *mp_realloc(lua_State *L, void *target, size_t osize,size_t nsize) {
return local_realloc(ud, target, osize, nsize);
}
-static mp_buf *mp_buf_new(lua_State *L) {
+mp_buf *mp_buf_new(lua_State *L) {
mp_buf *buf = NULL;
/* Old size = 0; new size = sizeof(*buf) */
@@ -117,7 +117,7 @@ static mp_buf *mp_buf_new(lua_State *L) {
return buf;
}
-static void mp_buf_append(mp_buf *buf, const unsigned char *s, size_t len) {
+void mp_buf_append(mp_buf *buf, const unsigned char *s, size_t len) {
if (buf->free < len) {
size_t newlen = buf->len+len;
@@ -153,7 +153,7 @@ typedef struct mp_cur {
int err;
} mp_cur;
-static void mp_cur_init(mp_cur *cursor, const unsigned char *s, size_t len) {
+void mp_cur_init(mp_cur *cursor, const unsigned char *s, size_t len) {
cursor->p = s;
cursor->left = len;
cursor->err = MP_CUR_ERROR_NONE;
@@ -173,13 +173,17 @@ static void mp_cur_init(mp_cur *cursor, const unsigned char *s, size_t len) {
/* ------------------------- Low level MP encoding -------------------------- */
-static void mp_encode_bytes(mp_buf *buf, const unsigned char *s, size_t len) {
+void mp_encode_bytes(mp_buf *buf, const unsigned char *s, size_t len) {
unsigned char hdr[5];
int hdrlen;
if (len < 32) {
hdr[0] = 0xa0 | (len&0xff); /* fix raw */
hdrlen = 1;
+ } else if (len <= 0xff) {
+ hdr[0] = 0xd9;
+ hdr[1] = len;
+ hdrlen = 2;
} else if (len <= 0xffff) {
hdr[0] = 0xda;
hdr[1] = (len&0xff00)>>8;
@@ -198,7 +202,7 @@ static void mp_encode_bytes(mp_buf *buf, const unsigned char *s, size_t len) {
}
/* we assume IEEE 754 internal format for single and double precision floats. */
-static void mp_encode_double(mp_buf *buf, double d) {
+void mp_encode_double(mp_buf *buf, double d) {
unsigned char b[9];
float f = d;
@@ -216,7 +220,7 @@ static void mp_encode_double(mp_buf *buf, double d) {
}
}
-static void mp_encode_int(mp_buf *buf, int64_t n) {
+void mp_encode_int(mp_buf *buf, int64_t n) {
unsigned char b[9];
int enclen;
@@ -288,7 +292,7 @@ static void mp_encode_int(mp_buf *buf, int64_t n) {
mp_buf_append(buf,b,enclen);
}
-static void mp_encode_array(mp_buf *buf, int64_t n) {
+void mp_encode_array(mp_buf *buf, int64_t n) {
unsigned char b[5];
int enclen;
@@ -311,7 +315,7 @@ static void mp_encode_array(mp_buf *buf, int64_t n) {
mp_buf_append(buf,b,enclen);
}
-static void mp_encode_map(mp_buf *buf, int64_t n) {
+void mp_encode_map(mp_buf *buf, int64_t n) {
unsigned char b[5];
int enclen;
@@ -336,7 +340,7 @@ static void mp_encode_map(mp_buf *buf, int64_t n) {
/* --------------------------- Lua types encoding --------------------------- */
-static void mp_encode_lua_string(lua_State *L, mp_buf *buf) {
+void mp_encode_lua_string(lua_State *L, mp_buf *buf) {
size_t len;
const char *s;
@@ -344,13 +348,13 @@ static void mp_encode_lua_string(lua_State *L, mp_buf *buf) {
mp_encode_bytes(buf,(const unsigned char*)s,len);
}
-static void mp_encode_lua_bool(lua_State *L, mp_buf *buf) {
+void mp_encode_lua_bool(lua_State *L, mp_buf *buf) {
unsigned char b = lua_toboolean(L,-1) ? 0xc3 : 0xc2;
mp_buf_append(buf,&b,1);
}
/* Lua 5.3 has a built in 64-bit integer type */
-static void mp_encode_lua_integer(lua_State *L, mp_buf *buf) {
+void mp_encode_lua_integer(lua_State *L, mp_buf *buf) {
#if (LUA_VERSION_NUM < 503) && BITS_32
lua_Number i = lua_tonumber(L,-1);
#else
@@ -362,7 +366,7 @@ static void mp_encode_lua_integer(lua_State *L, mp_buf *buf) {
/* Lua 5.2 and lower only has 64-bit doubles, so we need to
* detect if the double may be representable as an int
* for Lua < 5.3 */
-static void mp_encode_lua_number(lua_State *L, mp_buf *buf) {
+void mp_encode_lua_number(lua_State *L, mp_buf *buf) {
lua_Number n = lua_tonumber(L,-1);
if (IS_INT64_EQUIVALENT(n)) {
@@ -372,10 +376,10 @@ static void mp_encode_lua_number(lua_State *L, mp_buf *buf) {
}
}
-static void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level);
+void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level);
/* Convert a lua table into a message pack list. */
-static void mp_encode_lua_table_as_array(lua_State *L, mp_buf *buf, int level) {
+void mp_encode_lua_table_as_array(lua_State *L, mp_buf *buf, int level) {
#if LUA_VERSION_NUM < 502
size_t len = lua_objlen(L,-1), j;
#else
@@ -391,7 +395,7 @@ static void mp_encode_lua_table_as_array(lua_State *L, mp_buf *buf, int level) {
}
/* Convert a lua table into a message pack key-value map. */
-static void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
+void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
size_t len = 0;
/* First step: count keys into table. No other way to do it with the
@@ -418,7 +422,7 @@ static void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
/* Returns true if the Lua table on top of the stack is exclusively composed
* of keys from numerical keys from 1 up to N, with N being the total number
* of elements, without any hole in the middle. */
-static int table_is_an_array(lua_State *L) {
+int table_is_an_array(lua_State *L) {
int count = 0, max = 0;
#if LUA_VERSION_NUM < 503
lua_Number n;
@@ -461,14 +465,14 @@ static int table_is_an_array(lua_State *L) {
/* If the length operator returns non-zero, that is, there is at least
* an object at key '1', we serialize to message pack list. Otherwise
* we use a map. */
-static void mp_encode_lua_table(lua_State *L, mp_buf *buf, int level) {
+void mp_encode_lua_table(lua_State *L, mp_buf *buf, int level) {
if (table_is_an_array(L))
mp_encode_lua_table_as_array(L,buf,level);
else
mp_encode_lua_table_as_map(L,buf,level);
}
-static void mp_encode_lua_null(lua_State *L, mp_buf *buf) {
+void mp_encode_lua_null(lua_State *L, mp_buf *buf) {
unsigned char b[1];
(void)L;
@@ -476,7 +480,7 @@ static void mp_encode_lua_null(lua_State *L, mp_buf *buf) {
mp_buf_append(buf,b,1);
}
-static void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level) {
+void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level) {
int t = lua_type(L,-1);
/* Limit the encoding of nested tables to a specified maximum depth, so that
@@ -506,7 +510,7 @@ static void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level) {
* Packs all arguments as a stream for multiple upacking later.
* Returns error if no arguments provided.
*/
-static int mp_pack(lua_State *L) {
+int mp_pack(lua_State *L) {
int nargs = lua_gettop(L);
int i;
mp_buf *buf;
@@ -687,6 +691,15 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
mp_cur_consume(c,9);
}
break;
+ case 0xd9: /* raw 8 */
+ mp_cur_need(c,2);
+ {
+ size_t l = c->p[1];
+ mp_cur_need(c,2+l);
+ lua_pushlstring(L,(char*)c->p+2,l);
+ mp_cur_consume(c,2+l);
+ }
+ break;
case 0xda: /* raw 16 */
mp_cur_need(c,3);
{
@@ -773,7 +786,7 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
}
}
-static int mp_unpack_full(lua_State *L, int limit, int offset) {
+int mp_unpack_full(lua_State *L, int limit, int offset) {
size_t len;
const char *s;
mp_cur c;
@@ -826,18 +839,18 @@ static int mp_unpack_full(lua_State *L, int limit, int offset) {
return cnt;
}
-static int mp_unpack(lua_State *L) {
+int mp_unpack(lua_State *L) {
return mp_unpack_full(L, 0, 0);
}
-static int mp_unpack_one(lua_State *L) {
+int mp_unpack_one(lua_State *L) {
int offset = luaL_optinteger(L, 2, 0);
/* Variable pop because offset may not exist */
lua_pop(L, lua_gettop(L)-1);
return mp_unpack_full(L, 1, offset);
}
-static int mp_unpack_limit(lua_State *L) {
+int mp_unpack_limit(lua_State *L) {
int limit = luaL_checkinteger(L, 2);
int offset = luaL_optinteger(L, 3, 0);
/* Variable pop because offset may not exist */
@@ -846,7 +859,7 @@ static int mp_unpack_limit(lua_State *L) {
return mp_unpack_full(L, limit, offset);
}
-static int mp_safe(lua_State *L) {
+int mp_safe(lua_State *L) {
int argc, err, total_results;
argc = lua_gettop(L);
@@ -869,7 +882,7 @@ static int mp_safe(lua_State *L) {
}
/* -------------------------------------------------------------------------- */
-static const struct luaL_Reg cmds[] = {
+const struct luaL_Reg cmds[] = {
{"pack", mp_pack},
{"unpack", mp_unpack},
{"unpack_one", mp_unpack_one},
@@ -877,7 +890,7 @@ static const struct luaL_Reg cmds[] = {
{0}
};
-static int luaopen_create(lua_State *L) {
+int luaopen_create(lua_State *L) {
int i;
/* Manually construct our module table instead of
* relying on _register or _newlib */
diff --git a/redis.conf b/redis.conf
index d0684e86a..0f164af14 100644
--- a/redis.conf
+++ b/redis.conf
@@ -55,7 +55,7 @@
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
bind 127.0.0.1
-# Accept connections on the specified port, default is 6379.
+# Accept connections on the specified port, default is 6379 (IANA #815344).
# If port 0 is specified Redis will not listen on a TCP socket.
port 6379
@@ -113,8 +113,15 @@ daemonize no
# They do not enable continuous liveness pings back to your supervisor.
supervised no
-# When running daemonized, Redis writes a pid file in /var/run/redis.pid by
-# default. You can specify a custom pid file location here.
+# If a pid file is specified, Redis writes it where specified at startup
+# and removes it at exit.
+#
+# When the server runs non daemonized, no pid file is created if none is
+# specified in the configuration. When the server is daemonized, the pid file
+# is used even if not specified, defaulting to "/var/run/redis.pid".
+#
+# Creating a pid file is best effort: if Redis is not able to create it
+# nothing bad happens, the server will start and run normally.
pidfile /var/run/redis.pid
# Specify the server verbosity level.
diff --git a/src/Makefile b/src/Makefile
index 271ab34d8..650d438f7 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -15,7 +15,7 @@
release_hdr := $(shell sh -c './mkreleasehdr.sh')
uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
OPTIMIZATION?=-O2
-DEPENDENCY_TARGETS=hiredis linenoise lua
+DEPENDENCY_TARGETS=hiredis linenoise lua geohash-int
# Default settings
STD=-std=c99 -pedantic -DREDIS_STATIC=''
@@ -53,7 +53,7 @@ endif
# Override default settings if possible
-include .make-settings
-FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(REDIS_CFLAGS)
+FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(REDIS_CFLAGS) -I../deps/geohash-int
FINAL_LDFLAGS=$(LDFLAGS) $(REDIS_LDFLAGS) $(DEBUG)
FINAL_LIBS=-lm
DEBUG=-g -ggdb
@@ -117,7 +117,8 @@ endif
REDIS_SERVER_NAME=redis-server
REDIS_SENTINEL_NAME=redis-sentinel
-REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o
+REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o
+REDIS_GEOHASH_OBJ=../deps/geohash-int/geohash.o ../deps/geohash-int/geohash_helper.o
REDIS_CLI_NAME=redis-cli
REDIS_CLI_OBJ=anet.o sds.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o
REDIS_BENCHMARK_NAME=redis-benchmark
@@ -171,7 +172,7 @@ endif
# redis-server
$(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ)
- $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a $(FINAL_LIBS)
+ $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a $(REDIS_GEOHASH_OBJ) $(FINAL_LIBS)
# redis-sentinel
$(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME)
diff --git a/src/anet.c b/src/anet.c
index 2912d86d8..381d5232d 100644
--- a/src/anet.c
+++ b/src/anet.c
@@ -278,6 +278,7 @@ static int anetCreateSocket(char *err, int domain) {
#define ANET_CONNECT_NONE 0
#define ANET_CONNECT_NONBLOCK 1
+#define ANET_CONNECT_BE_BINDING 2 /* Best effort binding. */
static int anetTcpGenericConnect(char *err, char *addr, int port,
char *source_addr, int flags)
{
@@ -309,7 +310,7 @@ static int anetTcpGenericConnect(char *err, char *addr, int port,
if ((rv = getaddrinfo(source_addr, NULL, &hints, &bservinfo)) != 0)
{
anetSetError(err, "%s", gai_strerror(rv));
- goto end;
+ goto error;
}
for (b = bservinfo; b != NULL; b = b->ai_next) {
if (bind(s,b->ai_addr,b->ai_addrlen) != -1) {
@@ -320,7 +321,7 @@ static int anetTcpGenericConnect(char *err, char *addr, int port,
freeaddrinfo(bservinfo);
if (!bound) {
anetSetError(err, "bind: %s", strerror(errno));
- goto end;
+ goto error;
}
}
if (connect(s,p->ai_addr,p->ai_addrlen) == -1) {
@@ -345,9 +346,17 @@ error:
close(s);
s = ANET_ERR;
}
+
end:
freeaddrinfo(servinfo);
- return s;
+
+ /* Handle best effort binding: if a binding address was used, but it is
+ * not possible to create a socket, try again without a binding address. */
+ if (s == ANET_ERR && source_addr && (flags & ANET_CONNECT_BE_BINDING)) {
+ return anetTcpGenericConnect(err,addr,port,NULL,flags);
+ } else {
+ return s;
+ }
}
int anetTcpConnect(char *err, char *addr, int port)
@@ -360,9 +369,18 @@ int anetTcpNonBlockConnect(char *err, char *addr, int port)
return anetTcpGenericConnect(err,addr,port,NULL,ANET_CONNECT_NONBLOCK);
}
-int anetTcpNonBlockBindConnect(char *err, char *addr, int port, char *source_addr)
+int anetTcpNonBlockBindConnect(char *err, char *addr, int port,
+ char *source_addr)
+{
+ return anetTcpGenericConnect(err,addr,port,source_addr,
+ ANET_CONNECT_NONBLOCK);
+}
+
+int anetTcpNonBlockBestEffortBindConnect(char *err, char *addr, int port,
+ char *source_addr)
{
- return anetTcpGenericConnect(err,addr,port,source_addr,ANET_CONNECT_NONBLOCK);
+ return anetTcpGenericConnect(err,addr,port,source_addr,
+ ANET_CONNECT_NONBLOCK|ANET_CONNECT_BE_BINDING);
}
int anetUnixGenericConnect(char *err, char *path, int flags)
@@ -416,7 +434,7 @@ int anetRead(int fd, char *buf, int count)
return totlen;
}
-/* Like write(2) but make sure 'count' is read before to return
+/* Like write(2) but make sure 'count' is written before to return
* (unless error is encountered) */
int anetWrite(int fd, char *buf, int count)
{
diff --git a/src/anet.h b/src/anet.h
index ff5416599..f19979fe2 100644
--- a/src/anet.h
+++ b/src/anet.h
@@ -50,6 +50,7 @@
int anetTcpConnect(char *err, char *addr, int port);
int anetTcpNonBlockConnect(char *err, char *addr, int port);
int anetTcpNonBlockBindConnect(char *err, char *addr, int port, char *source_addr);
+int anetTcpNonBlockBestEffortBindConnect(char *err, char *addr, int port, char *source_addr);
int anetUnixConnect(char *err, char *path);
int anetUnixNonBlockConnect(char *err, char *path);
int anetRead(int fd, char *buf, int count);
diff --git a/src/aof.c b/src/aof.c
index cdf103331..05d1d4dff 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -1146,9 +1146,9 @@ int rewriteAppendOnlyFile(char *filename) {
return REDIS_OK;
werr:
+ redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
fclose(fp);
unlink(tmpfile);
- redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
if (di) dictReleaseIterator(di);
return REDIS_ERR;
}
diff --git a/src/bio.c b/src/bio.c
index 4bd5a17c6..27bc9abfc 100644
--- a/src/bio.c
+++ b/src/bio.c
@@ -142,6 +142,13 @@ void *bioProcessBackgroundJobs(void *arg) {
unsigned long type = (unsigned long) arg;
sigset_t sigset;
+ /* Check that the type is within the right interval. */
+ if (type >= REDIS_BIO_NUM_OPS) {
+ redisLog(REDIS_WARNING,
+ "Warning: bio thread started with wrong type %lu",type);
+ return NULL;
+ }
+
/* Make the thread killable at any time, so that bioKillThreads()
* can work reliably. */
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
diff --git a/src/blocked.c b/src/blocked.c
index ef0d5246d..2ec2cf626 100644
--- a/src/blocked.c
+++ b/src/blocked.c
@@ -59,6 +59,8 @@
* When implementing a new type of blocking opeation, the implementation
* should modify unblockClient() and replyToBlockedClientTimedOut() in order
* to handle the btype-specific behavior of this two functions.
+ * If the blocking operation waits for certain keys to change state, the
+ * clusterRedirectBlockedClientIfNeeded() function should also be updated.
*/
#include "redis.h"
@@ -114,11 +116,15 @@ void processUnblockedClients(void) {
c = ln->value;
listDelNode(server.unblocked_clients,ln);
c->flags &= ~REDIS_UNBLOCKED;
- c->btype = REDIS_BLOCKED_NONE;
- /* Process remaining data in the input buffer. */
- if (c->querybuf && sdslen(c->querybuf) > 0) {
- processInputBuffer(c);
+ /* Process remaining data in the input buffer, unless the client
+ * is blocked again. Actually processInputBuffer() checks that the
+ * client is not blocked before to proceed, but things may change and
+ * the code is conceptually more correct this way. */
+ if (!(c->flags & REDIS_BLOCKED)) {
+ if (c->querybuf && sdslen(c->querybuf) > 0) {
+ processInputBuffer(c);
+ }
}
}
}
@@ -136,10 +142,14 @@ void unblockClient(redisClient *c) {
/* Clear the flags, and put the client in the unblocked list so that
* we'll process new commands in its query buffer ASAP. */
c->flags &= ~REDIS_BLOCKED;
- c->flags |= REDIS_UNBLOCKED;
c->btype = REDIS_BLOCKED_NONE;
server.bpop_blocked_clients--;
- listAddNodeTail(server.unblocked_clients,c);
+ /* The client may already be into the unblocked list because of a previous
+ * blocking operation, don't add back it into the list multiple times. */
+ if (!(c->flags & REDIS_UNBLOCKED)) {
+ c->flags |= REDIS_UNBLOCKED;
+ listAddNodeTail(server.unblocked_clients,c);
+ }
}
/* This function gets called when a blocked client timed out in order to
@@ -154,3 +164,27 @@ void replyToBlockedClientTimedOut(redisClient *c) {
}
}
+/* Mass-unblock clients because something changed in the instance that makes
+ * blocking no longer safe. For example clients blocked in list operations
+ * in an instance which turns from master to slave is unsafe, so this function
+ * is called when a master turns into a slave.
+ *
+ * The semantics is to send an -UNBLOCKED error to the client, disconnecting
+ * it at the same time. */
+void disconnectAllBlockedClients(void) {
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.clients,&li);
+ while((ln = listNext(&li))) {
+ redisClient *c = listNodeValue(ln);
+
+ if (c->flags & REDIS_BLOCKED) {
+ addReplySds(c,sdsnew(
+ "-UNBLOCKED force unblock from blocking operation, "
+ "instance state changed (master -> slave?)\r\n"));
+ unblockClient(c);
+ c->flags |= REDIS_CLOSE_AFTER_REPLY;
+ }
+ }
+}
diff --git a/src/cluster.c b/src/cluster.c
index df1498bdd..6280677ae 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -74,27 +74,13 @@ void clusterCloseAllSlots(void);
void clusterSetNodeAsMaster(clusterNode *n);
void clusterDelNode(clusterNode *delnode);
sds representRedisNodeFlags(sds ci, uint16_t flags);
+uint64_t clusterGetMaxEpoch(void);
+int clusterBumpConfigEpochWithoutConsensus(void);
/* -----------------------------------------------------------------------------
* Initialization
* -------------------------------------------------------------------------- */
-/* Return the greatest configEpoch found in the cluster. */
-uint64_t clusterGetMaxEpoch(void) {
- uint64_t max = 0;
- dictIterator *di;
- dictEntry *de;
-
- di = dictGetSafeIterator(server.cluster->nodes);
- while((de = dictNext(di)) != NULL) {
- clusterNode *node = dictGetVal(de);
- if (node->configEpoch > max) max = node->configEpoch;
- }
- dictReleaseIterator(di);
- if (max < server.cluster->currentEpoch) max = server.cluster->currentEpoch;
- return max;
-}
-
/* Load the cluster config from 'filename'.
*
* If the file does not exist or is zero-length (this may happen because
@@ -372,6 +358,11 @@ void clusterSaveConfigOrDie(int do_fsync) {
* On success REDIS_OK is returned, otherwise an error is logged and
* the function returns REDIS_ERR to signal a lock was not acquired. */
int clusterLockConfig(char *filename) {
+/* flock() does not exist on Solaris
+ * and a fcntl-based solution won't help, as we constantly re-open that file,
+ * which will release _all_ locks anyway
+ */
+#if !defined(__sun)
/* To lock it, we need to open the file in a way it is created if
* it does not exist, otherwise there is a race condition with other
* processes. */
@@ -399,6 +390,8 @@ int clusterLockConfig(char *filename) {
}
/* Lock acquired: leak the 'fd' by not closing it, so that we'll retain the
* lock to the file as long as the process exists. */
+#endif /* __sun */
+
return REDIS_OK;
}
@@ -928,6 +921,138 @@ void clusterRenameNode(clusterNode *node, char *newname) {
}
/* -----------------------------------------------------------------------------
+ * CLUSTER config epoch handling
+ * -------------------------------------------------------------------------- */
+
+/* Return the greatest configEpoch found in the cluster, or the current
+ * epoch if greater than any node configEpoch. */
+uint64_t clusterGetMaxEpoch(void) {
+ uint64_t max = 0;
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+ if (node->configEpoch > max) max = node->configEpoch;
+ }
+ dictReleaseIterator(di);
+ if (max < server.cluster->currentEpoch) max = server.cluster->currentEpoch;
+ return max;
+}
+
+/* If this node epoch is zero or is not already the greatest across the
+ * cluster (from the POV of the local configuration), this function will:
+ *
+ * 1) Generate a new config epoch increment the current epoch.
+ * 2) Assign the new epoch to this node, WITHOUT any consensus.
+ * 3) Persist the configuration on disk before sending packets with the
+ * new configuration.
+ *
+ * If the new config epoch is generated and assigend, REDIS_OK is returned,
+ * otherwise REDIS_ERR is returned (since the node has already the greatest
+ * configuration around) and no operation is performed.
+ *
+ * Important note: this function violates the principle that config epochs
+ * should be generated with consensus and should be unique across the cluster.
+ * However Redis Cluster uses this auto-generated new config epochs in two
+ * cases:
+ *
+ * 1) When slots are closed after importing. Otherwise resharding would be
+ * too exansive.
+ * 2) When CLUSTER FAILOVER is called with options that force a slave to
+ * failover its master even if there is not master majority able to
+ * create a new configuration epoch.
+ *
+ * Redis Cluster does not explode using this function, even in the case of
+ * a collision between this node and another node, generating the same
+ * configuration epoch unilaterally, because the config epoch conflict
+ * resolution algorithm will eventually move colliding nodes to different
+ * config epochs. However usign this function may violate the "last failover
+ * wins" rule, so should only be used with care. */
+int clusterBumpConfigEpochWithoutConsensus(void) {
+ uint64_t maxEpoch = clusterGetMaxEpoch();
+
+ if (myself->configEpoch == 0 ||
+ myself->configEpoch != maxEpoch)
+ {
+ server.cluster->currentEpoch++;
+ myself->configEpoch = server.cluster->currentEpoch;
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_FSYNC_CONFIG);
+ redisLog(REDIS_WARNING,
+ "New configEpoch set to %llu",
+ (unsigned long long) myself->configEpoch);
+ return REDIS_OK;
+ } else {
+ return REDIS_ERR;
+ }
+}
+
+/* This function is called when this node is a master, and we receive from
+ * another master a configuration epoch that is equal to our configuration
+ * epoch.
+ *
+ * BACKGROUND
+ *
+ * It is not possible that different slaves get the same config
+ * epoch during a failover election, because the slaves need to get voted
+ * by a majority. However when we perform a manual resharding of the cluster
+ * the node will assign a configuration epoch to itself without to ask
+ * for agreement. Usually resharding happens when the cluster is working well
+ * and is supervised by the sysadmin, however it is possible for a failover
+ * to happen exactly while the node we are resharding a slot to assigns itself
+ * a new configuration epoch, but before it is able to propagate it.
+ *
+ * So technically it is possible in this condition that two nodes end with
+ * the same configuration epoch.
+ *
+ * Another possibility is that there are bugs in the implementation causing
+ * this to happen.
+ *
+ * Moreover when a new cluster is created, all the nodes start with the same
+ * configEpoch. This collision resolution code allows nodes to automatically
+ * end with a different configEpoch at startup automatically.
+ *
+ * In all the cases, we want a mechanism that resolves this issue automatically
+ * as a safeguard. The same configuration epoch for masters serving different
+ * set of slots is not harmful, but it is if the nodes end serving the same
+ * slots for some reason (manual errors or software bugs) without a proper
+ * failover procedure.
+ *
+ * In general we want a system that eventually always ends with different
+ * masters having different configuration epochs whatever happened, since
+ * nothign is worse than a split-brain condition in a distributed system.
+ *
+ * BEHAVIOR
+ *
+ * When this function gets called, what happens is that if this node
+ * has the lexicographically smaller Node ID compared to the other node
+ * with the conflicting epoch (the 'sender' node), it will assign itself
+ * the greatest configuration epoch currently detected among nodes plus 1.
+ *
+ * This means that even if there are multiple nodes colliding, the node
+ * with the greatest Node ID never moves forward, so eventually all the nodes
+ * end with a different configuration epoch.
+ */
+void clusterHandleConfigEpochCollision(clusterNode *sender) {
+ /* Prerequisites: nodes have the same configEpoch and are both masters. */
+ if (sender->configEpoch != myself->configEpoch ||
+ !nodeIsMaster(sender) || !nodeIsMaster(myself)) return;
+ /* Don't act if the colliding node has a smaller Node ID. */
+ if (memcmp(sender->name,myself->name,REDIS_CLUSTER_NAMELEN) <= 0) return;
+ /* Get the next ID available at the best of this node knowledge. */
+ server.cluster->currentEpoch++;
+ myself->configEpoch = server.cluster->currentEpoch;
+ clusterSaveConfigOrDie(1);
+ redisLog(REDIS_VERBOSE,
+ "WARNING: configEpoch collision with node %.40s."
+ " configEpoch set to %llu",
+ sender->name,
+ (unsigned long long) myself->configEpoch);
+}
+
+/* -----------------------------------------------------------------------------
* CLUSTER nodes blacklist
*
* The nodes blacklist is just a way to ensure that a given node with a given
@@ -1399,69 +1524,6 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
}
}
-/* This function is called when this node is a master, and we receive from
- * another master a configuration epoch that is equal to our configuration
- * epoch.
- *
- * BACKGROUND
- *
- * It is not possible that different slaves get the same config
- * epoch during a failover election, because the slaves need to get voted
- * by a majority. However when we perform a manual resharding of the cluster
- * the node will assign a configuration epoch to itself without to ask
- * for agreement. Usually resharding happens when the cluster is working well
- * and is supervised by the sysadmin, however it is possible for a failover
- * to happen exactly while the node we are resharding a slot to assigns itself
- * a new configuration epoch, but before it is able to propagate it.
- *
- * So technically it is possible in this condition that two nodes end with
- * the same configuration epoch.
- *
- * Another possibility is that there are bugs in the implementation causing
- * this to happen.
- *
- * Moreover when a new cluster is created, all the nodes start with the same
- * configEpoch. This collision resolution code allows nodes to automatically
- * end with a different configEpoch at startup automatically.
- *
- * In all the cases, we want a mechanism that resolves this issue automatically
- * as a safeguard. The same configuration epoch for masters serving different
- * set of slots is not harmful, but it is if the nodes end serving the same
- * slots for some reason (manual errors or software bugs) without a proper
- * failover procedure.
- *
- * In general we want a system that eventually always ends with different
- * masters having different configuration epochs whatever happened, since
- * nothign is worse than a split-brain condition in a distributed system.
- *
- * BEHAVIOR
- *
- * When this function gets called, what happens is that if this node
- * has the lexicographically smaller Node ID compared to the other node
- * with the conflicting epoch (the 'sender' node), it will assign itself
- * the greatest configuration epoch currently detected among nodes plus 1.
- *
- * This means that even if there are multiple nodes colliding, the node
- * with the greatest Node ID never moves forward, so eventually all the nodes
- * end with a different configuration epoch.
- */
-void clusterHandleConfigEpochCollision(clusterNode *sender) {
- /* Prerequisites: nodes have the same configEpoch and are both masters. */
- if (sender->configEpoch != myself->configEpoch ||
- !nodeIsMaster(sender) || !nodeIsMaster(myself)) return;
- /* Don't act if the colliding node has a smaller Node ID. */
- if (memcmp(sender->name,myself->name,REDIS_CLUSTER_NAMELEN) <= 0) return;
- /* Get the next ID available at the best of this node knowledge. */
- server.cluster->currentEpoch++;
- myself->configEpoch = server.cluster->currentEpoch;
- clusterSaveConfigOrDie(1);
- redisLog(REDIS_VERBOSE,
- "WARNING: configEpoch collision with node %.40s."
- " configEpoch set to %llu",
- sender->name,
- (unsigned long long) myself->configEpoch);
-}
-
/* When this function is called, there is a packet to process starting
* at node->rcvbuf. Releasing the buffer is up to the caller, so this
* function should just handle the higher level stuff of processing the
@@ -2582,6 +2644,42 @@ void clusterLogCantFailover(int reason) {
redisLog(REDIS_WARNING,"Currently unable to failover: %s", msg);
}
+/* This function implements the final part of automatic and manual failovers,
+ * where the slave grabs its master's hash slots, and propagates the new
+ * configuration.
+ *
+ * Note that it's up to the caller to be sure that the node got a new
+ * configuration epoch already. */
+void clusterFailoverReplaceYourMaster(void) {
+ int j;
+ clusterNode *oldmaster = myself->slaveof;
+
+ if (nodeIsMaster(myself) || oldmaster == NULL) return;
+
+ /* 1) Turn this node into a master. */
+ clusterSetNodeAsMaster(myself);
+ replicationUnsetMaster();
+
+ /* 2) Claim all the slots assigned to our master. */
+ for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ if (clusterNodeGetSlotBit(oldmaster,j)) {
+ clusterDelSlot(j);
+ clusterAddSlot(myself,j);
+ }
+ }
+
+ /* 3) Update state and save config. */
+ clusterUpdateState();
+ clusterSaveConfigOrDie(1);
+
+ /* 4) Pong all the other nodes so that they can update the state
+ * accordingly and detect that we switched to master role. */
+ clusterBroadcastPong(CLUSTER_BROADCAST_ALL);
+
+ /* 5) If there was a manual failover in progress, clear the state. */
+ resetManualFailover();
+}
+
/* This function is called if we are a slave node and our master serving
* a non-zero amount of hash slots is in FAIL state.
*
@@ -2596,7 +2694,6 @@ void clusterHandleSlaveFailover(void) {
int needed_quorum = (server.cluster->size / 2) + 1;
int manual_failover = server.cluster->mf_end != 0 &&
server.cluster->mf_can_start;
- int j;
mstime_t auth_timeout, auth_retry_time;
server.cluster->todo_before_sleep &= ~CLUSTER_TODO_HANDLE_FAILOVER;
@@ -2738,26 +2835,12 @@ void clusterHandleSlaveFailover(void) {
/* Check if we reached the quorum. */
if (server.cluster->failover_auth_count >= needed_quorum) {
- clusterNode *oldmaster = myself->slaveof;
+ /* We have the quorum, we can finally failover the master. */
redisLog(REDIS_WARNING,
"Failover election won: I'm the new master.");
- /* We have the quorum, perform all the steps to correctly promote
- * this slave to a master.
- *
- * 1) Turn this node into a master. */
- clusterSetNodeAsMaster(myself);
- replicationUnsetMaster();
-
- /* 2) Claim all the slots assigned to our master. */
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
- if (clusterNodeGetSlotBit(oldmaster,j)) {
- clusterDelSlot(j);
- clusterAddSlot(myself,j);
- }
- }
- /* 3) Update my configEpoch to the epoch of the election. */
+ /* Update my configEpoch to the epoch of the election. */
if (myself->configEpoch < server.cluster->failover_auth_epoch) {
myself->configEpoch = server.cluster->failover_auth_epoch;
redisLog(REDIS_WARNING,
@@ -2765,16 +2848,8 @@ void clusterHandleSlaveFailover(void) {
(unsigned long long) myself->configEpoch);
}
- /* 4) Update state and save config. */
- clusterUpdateState();
- clusterSaveConfigOrDie(1);
-
- /* 5) Pong all the other nodes so that they can update the state
- * accordingly and detect that we switched to master role. */
- clusterBroadcastPong(CLUSTER_BROADCAST_ALL);
-
- /* 6) If there was a manual failover in progress, clear the state. */
- resetManualFailover();
+ /* Take responsability for the cluster slots. */
+ clusterFailoverReplaceYourMaster();
} else {
clusterLogCantFailover(REDIS_CLUSTER_CANT_FAILOVER_WAITING_VOTES);
}
@@ -3567,7 +3642,7 @@ sds clusterGenNodeDescription(clusterNode *node) {
else
ci = sdscatlen(ci," - ",3);
- /* Latency from the POV of this node, link status */
+ /* Latency from the POV of this node, config epoch, link status */
ci = sdscatprintf(ci,"%lld %lld %llu %s",
(long long) node->ping_sent,
(long long) node->pong_received,
@@ -3760,6 +3835,9 @@ void clusterCommand(redisClient *c) {
o = createObject(REDIS_STRING,ci);
addReplyBulk(c,o);
decrRefCount(o);
+ } else if (!strcasecmp(c->argv[1]->ptr,"myid") && c->argc == 2) {
+ /* CLUSTER MYID */
+ addReplyBulkCBuffer(c,myself->name, REDIS_CLUSTER_NAMELEN);
} else if (!strcasecmp(c->argv[1]->ptr,"slots") && c->argc == 2) {
/* CLUSTER SLOTS */
clusterReplyMultiBulkSlots(c);
@@ -3899,17 +3977,9 @@ void clusterCommand(redisClient *c) {
* failover happens at the same time we close the slot, the
* configEpoch collision resolution will fix it assigning
* a different epoch to each node. */
- uint64_t maxEpoch = clusterGetMaxEpoch();
-
- if (myself->configEpoch == 0 ||
- myself->configEpoch != maxEpoch)
- {
- server.cluster->currentEpoch++;
- myself->configEpoch = server.cluster->currentEpoch;
- clusterDoBeforeSleep(CLUSTER_TODO_FSYNC_CONFIG);
+ if (clusterBumpConfigEpochWithoutConsensus() == REDIS_OK) {
redisLog(REDIS_WARNING,
- "configEpoch set to %llu after importing slot %d",
- (unsigned long long) myself->configEpoch, slot);
+ "configEpoch updated after importing slot %d", slot);
}
server.cluster->importing_slots_from[slot] = NULL;
}
@@ -3970,7 +4040,10 @@ void clusterCommand(redisClient *c) {
server.cluster->stats_bus_messages_sent,
server.cluster->stats_bus_messages_received
);
- addReplyBulkSds(c, info);
+ addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
+ (unsigned long)sdslen(info)));
+ addReplySds(c,info);
+ addReply(c,shared.crlf);
} else if (!strcasecmp(c->argv[1]->ptr,"saveconfig") && c->argc == 2) {
int retval = clusterSaveConfig(1);
@@ -4052,7 +4125,7 @@ void clusterCommand(redisClient *c) {
}
/* Can't replicate a slave. */
- if (n->slaveof != NULL) {
+ if (nodeIsSlave(n)) {
addReplyError(c,"I can only replicate a master, not a slave.");
return;
}
@@ -4109,24 +4182,31 @@ void clusterCommand(redisClient *c) {
} else if (!strcasecmp(c->argv[1]->ptr,"failover") &&
(c->argc == 2 || c->argc == 3))
{
- /* CLUSTER FAILOVER [FORCE] */
- int force = 0;
+ /* CLUSTER FAILOVER [FORCE|TAKEOVER] */
+ int force = 0, takeover = 0;
if (c->argc == 3) {
if (!strcasecmp(c->argv[2]->ptr,"force")) {
force = 1;
+ } else if (!strcasecmp(c->argv[2]->ptr,"takeover")) {
+ takeover = 1;
+ force = 1; /* Takeover also implies force. */
} else {
addReply(c,shared.syntaxerr);
return;
}
}
+ /* Check preconditions. */
if (nodeIsMaster(myself)) {
addReplyError(c,"You should send CLUSTER FAILOVER to a slave");
return;
+ } else if (myself->slaveof == NULL) {
+ addReplyError(c,"I'm a slave but my master is unknown to me");
+ return;
} else if (!force &&
- (myself->slaveof == NULL || nodeFailed(myself->slaveof) ||
- myself->slaveof->link == NULL))
+ (nodeFailed(myself->slaveof) ||
+ myself->slaveof->link == NULL))
{
addReplyError(c,"Master is down or failed, "
"please use CLUSTER FAILOVER FORCE");
@@ -4135,15 +4215,24 @@ void clusterCommand(redisClient *c) {
resetManualFailover();
server.cluster->mf_end = mstime() + REDIS_CLUSTER_MF_TIMEOUT;
- /* If this is a forced failover, we don't need to talk with our master
- * to agree about the offset. We just failover taking over it without
- * coordination. */
- if (force) {
+ if (takeover) {
+ /* A takeover does not perform any initial check. It just
+ * generates a new configuration epoch for this node without
+ * consensus, claims the master's slots, and broadcast the new
+ * configuration. */
+ redisLog(REDIS_WARNING,"Taking over the master (user request).");
+ clusterBumpConfigEpochWithoutConsensus();
+ clusterFailoverReplaceYourMaster();
+ } else if (force) {
+ /* If this is a forced failover, we don't need to talk with our
+ * master to agree about the offset. We just failover taking over
+ * it without coordination. */
+ redisLog(REDIS_WARNING,"Forced failover user request accepted.");
server.cluster->mf_can_start = 1;
} else {
+ redisLog(REDIS_WARNING,"Manual failover user request accepted.");
clusterSendMFStart(myself->slaveof);
}
- redisLog(REDIS_WARNING,"Manual failover user request accepted.");
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"set-config-epoch") && c->argc == 3)
{
@@ -4404,8 +4493,8 @@ migrateCachedSocket* migrateGetSocket(redisClient *c, robj *host, robj *port, lo
}
/* Create the socket */
- fd = anetTcpNonBlockBindConnect(server.neterr,c->argv[1]->ptr,
- atoi(c->argv[2]->ptr),REDIS_BIND_ADDR);
+ fd = anetTcpNonBlockConnect(server.neterr,c->argv[1]->ptr,
+ atoi(c->argv[2]->ptr));
if (fd == -1) {
sdsfree(name);
addReplyErrorFormat(c,"Can't connect to target node: %s",
@@ -4507,7 +4596,7 @@ try_again:
/* Check if the key is here. If not we reply with success as there is
* nothing to migrate (for instance the key expired in the meantime), but
* we include such information in the reply string. */
- if ((o = lookupKeyWrite(c->db,c->argv[3])) == NULL) {
+ if ((o = lookupKeyRead(c->db,c->argv[3])) == NULL) {
addReplySds(c,sdsnew("+NOKEY\r\n"));
return;
}
@@ -4560,7 +4649,7 @@ try_again:
{
sds buf = cmd.io.buffer.ptr;
size_t pos = 0, towrite;
- ssize_t nwritten = 0;
+ int nwritten = 0;
while ((towrite = sdslen(buf)-pos) > 0) {
towrite = (towrite > (64*1024) ? (64*1024) : towrite);
@@ -4684,7 +4773,12 @@ void readwriteCommand(redisClient *c) {
*
* REDIS_CLUSTER_REDIR_UNSTABLE if the request contains mutliple keys
* belonging to the same slot, but the slot is not stable (in migration or
- * importing state, likely because a resharding is in progress). */
+ * importing state, likely because a resharding is in progress).
+ *
+ * REDIS_CLUSTER_REDIR_DOWN_UNBOUND if the request addresses a slot which is
+ * not bound to any node. In this case the cluster global state should be
+ * already "down" but it is fragile to rely on the update of the global state,
+ * so we also handle it here. */
clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *error_code) {
clusterNode *n = NULL;
robj *firstkey = NULL;
@@ -4738,7 +4832,18 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
firstkey = thiskey;
slot = thisslot;
n = server.cluster->slots[slot];
- redisAssertWithInfo(c,firstkey,n != NULL);
+
+ /* Error: If a slot is not served, we are in "cluster down"
+ * state. However the state is yet to be updated, so this was
+ * not trapped earlier in processCommand(). Report the same
+ * error to the client. */
+ if (n == NULL) {
+ getKeysFreeResult(keyindex);
+ if (error_code)
+ *error_code = REDIS_CLUSTER_REDIR_DOWN_UNBOUND;
+ return NULL;
+ }
+
/* If we are migrating or importing this slot, we need to check
* if we have all the keys in the request (the only way we
* can safely serve the request, otherwise we return a TRYAGAIN
@@ -4827,3 +4932,83 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
if (n != myself && error_code) *error_code = REDIS_CLUSTER_REDIR_MOVED;
return n;
}
+
+/* Send the client the right redirection code, according to error_code
+ * that should be set to one of REDIS_CLUSTER_REDIR_* macros.
+ *
+ * If REDIS_CLUSTER_REDIR_ASK or REDIS_CLUSTER_REDIR_MOVED error codes
+ * are used, then the node 'n' should not be NULL, but should be the
+ * node we want to mention in the redirection. Moreover hashslot should
+ * be set to the hash slot that caused the redirection. */
+void clusterRedirectClient(redisClient *c, clusterNode *n, int hashslot, int error_code) {
+ if (error_code == REDIS_CLUSTER_REDIR_CROSS_SLOT) {
+ addReplySds(c,sdsnew("-CROSSSLOT Keys in request don't hash to the same slot\r\n"));
+ } else if (error_code == REDIS_CLUSTER_REDIR_UNSTABLE) {
+ /* The request spawns mutliple keys in the same slot,
+ * but the slot is not "stable" currently as there is
+ * a migration or import in progress. */
+ addReplySds(c,sdsnew("-TRYAGAIN Multiple keys request during rehashing of slot\r\n"));
+ } else if (error_code == REDIS_CLUSTER_REDIR_DOWN_STATE) {
+ addReplySds(c,sdsnew("-CLUSTERDOWN The cluster is down\r\n"));
+ } else if (error_code == REDIS_CLUSTER_REDIR_DOWN_UNBOUND) {
+ addReplySds(c,sdsnew("-CLUSTERDOWN Hash slot not served\r\n"));
+ } else if (error_code == REDIS_CLUSTER_REDIR_MOVED ||
+ error_code == REDIS_CLUSTER_REDIR_ASK)
+ {
+ addReplySds(c,sdscatprintf(sdsempty(),
+ "-%s %d %s:%d\r\n",
+ (error_code == REDIS_CLUSTER_REDIR_ASK) ? "ASK" : "MOVED",
+ hashslot,n->ip,n->port));
+ } else {
+ redisPanic("getNodeByQuery() unknown error.");
+ }
+}
+
+/* This function is called by the function processing clients incrementally
+ * to detect timeouts, in order to handle the following case:
+ *
+ * 1) A client blocks with BLPOP or similar blocking operation.
+ * 2) The master migrates the hash slot elsewhere or turns into a slave.
+ * 3) The client may remain blocked forever (or up to the max timeout time)
+ * waiting for a key change that will never happen.
+ *
+ * If the client is found to be blocked into an hash slot this node no
+ * longer handles, the client is sent a redirection error, and the function
+ * returns 1. Otherwise 0 is returned and no operation is performed. */
+int clusterRedirectBlockedClientIfNeeded(redisClient *c) {
+ if (c->flags & REDIS_BLOCKED && c->btype == REDIS_BLOCKED_LIST) {
+ dictEntry *de;
+ dictIterator *di;
+
+ /* If the cluster is down, unblock the client with the right error. */
+ if (server.cluster->state == REDIS_CLUSTER_FAIL) {
+ clusterRedirectClient(c,NULL,0,REDIS_CLUSTER_REDIR_DOWN_STATE);
+ return 1;
+ }
+
+ di = dictGetIterator(c->bpop.keys);
+ while((de = dictNext(di)) != NULL) {
+ robj *key = dictGetKey(de);
+ int slot = keyHashSlot((char*)key->ptr, sdslen(key->ptr));
+ clusterNode *node = server.cluster->slots[slot];
+
+ /* We send an error and unblock the client if:
+ * 1) The slot is unassigned, emitting a cluster down error.
+ * 2) The slot is not handled by this node, nor being imported. */
+ if (node != myself &&
+ server.cluster->importing_slots_from[slot] == NULL)
+ {
+ if (node == NULL) {
+ clusterRedirectClient(c,NULL,0,
+ REDIS_CLUSTER_REDIR_DOWN_UNBOUND);
+ } else {
+ clusterRedirectClient(c,node,slot,
+ REDIS_CLUSTER_REDIR_MOVED);
+ }
+ return 1;
+ }
+ }
+ dictReleaseIterator(di);
+ }
+ return 0;
+}
diff --git a/src/cluster.h b/src/cluster.h
index ef5caf0d6..bf442a222 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -26,10 +26,12 @@
/* Redirection errors returned by getNodeByQuery(). */
#define REDIS_CLUSTER_REDIR_NONE 0 /* Node can serve the request. */
-#define REDIS_CLUSTER_REDIR_CROSS_SLOT 1 /* Keys in different slots. */
-#define REDIS_CLUSTER_REDIR_UNSTABLE 2 /* Keys in slot resharding. */
+#define REDIS_CLUSTER_REDIR_CROSS_SLOT 1 /* -CROSSSLOT request. */
+#define REDIS_CLUSTER_REDIR_UNSTABLE 2 /* -TRYAGAIN redirection required */
#define REDIS_CLUSTER_REDIR_ASK 3 /* -ASK redirection required. */
#define REDIS_CLUSTER_REDIR_MOVED 4 /* -MOVED redirection required. */
+#define REDIS_CLUSTER_REDIR_DOWN_STATE 5 /* -CLUSTERDOWN, global state. */
+#define REDIS_CLUSTER_REDIR_DOWN_UNBOUND 6 /* -CLUSTERDOWN, unbound slot. */
struct clusterNode;
@@ -248,5 +250,7 @@ typedef struct {
/* ---------------------- API exported outside cluster.c -------------------- */
clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask);
+int clusterRedirectBlockedClientIfNeeded(redisClient *c);
+void clusterRedirectClient(redisClient *c, clusterNode *n, int hashslot, int error_code);
#endif /* __REDIS_CLUSTER_H */
diff --git a/src/config.c b/src/config.c
index 3963a1218..fd56269ef 100644
--- a/src/config.c
+++ b/src/config.c
@@ -34,10 +34,26 @@
#include <fcntl.h>
#include <sys/stat.h>
-static struct {
- const char *name;
- const int value;
-} validSyslogFacilities[] = {
+/*-----------------------------------------------------------------------------
+ * Config file name-value maps.
+ *----------------------------------------------------------------------------*/
+
+typedef struct configEnum {
+ const char *name;
+ const int val;
+} configEnum;
+
+configEnum maxmemory_policy_enum[] = {
+ {"volatile-lru", REDIS_MAXMEMORY_VOLATILE_LRU},
+ {"volatile-random",REDIS_MAXMEMORY_VOLATILE_RANDOM},
+ {"volatile-ttl",REDIS_MAXMEMORY_VOLATILE_TTL},
+ {"allkeys-lru",REDIS_MAXMEMORY_ALLKEYS_LRU},
+ {"allkeys-random",REDIS_MAXMEMORY_ALLKEYS_RANDOM},
+ {"noeviction",REDIS_MAXMEMORY_NO_EVICTION},
+ {NULL, 0}
+};
+
+configEnum syslog_facility_enum[] = {
{"user", LOG_USER},
{"local0", LOG_LOCAL0},
{"local1", LOG_LOCAL1},
@@ -50,6 +66,30 @@ static struct {
{NULL, 0}
};
+configEnum loglevel_enum[] = {
+ {"debug", REDIS_DEBUG},
+ {"verbose", REDIS_VERBOSE},
+ {"notice", REDIS_NOTICE},
+ {"warning", REDIS_WARNING},
+ {NULL,0}
+};
+
+configEnum supervised_mode_enum[] = {
+ {"upstart", REDIS_SUPERVISED_UPSTART},
+ {"systemd", REDIS_SUPERVISED_SYSTEMD},
+ {"auto", REDIS_SUPERVISED_AUTODETECT},
+ {"no", REDIS_SUPERVISED_NONE},
+ {NULL, 0}
+};
+
+configEnum aof_fsync_enum[] = {
+ {"everysec", AOF_FSYNC_EVERYSEC},
+ {"always", AOF_FSYNC_ALWAYS},
+ {"no", AOF_FSYNC_NO},
+ {NULL, 0}
+};
+
+/* Output buffer limits presets. */
clientBufferLimitsConfig clientBufferLimitsDefaults[REDIS_CLIENT_TYPE_COUNT] = {
{0, 0, 0}, /* normal */
{1024*1024*256, 1024*1024*64, 60}, /* slave */
@@ -57,10 +97,42 @@ clientBufferLimitsConfig clientBufferLimitsDefaults[REDIS_CLIENT_TYPE_COUNT] = {
};
/*-----------------------------------------------------------------------------
- * Config file parsing
+ * Enum access functions
*----------------------------------------------------------------------------*/
-int supervisedToMode(const char *str);
+/* Get enum value from name. If there is no match INT_MIN is returned. */
+int configEnumGetValue(configEnum *ce, char *name) {
+ while(ce->name != NULL) {
+ if (!strcasecmp(ce->name,name)) return ce->val;
+ ce++;
+ }
+ return INT_MIN;
+}
+
+/* Get enum name from value. If no match is found NULL is returned. */
+const char *configEnumGetName(configEnum *ce, int val) {
+ while(ce->name != NULL) {
+ if (ce->val == val) return ce->name;
+ ce++;
+ }
+ return NULL;
+}
+
+/* Wrapper for configEnumGetName() returning "unknown" insetad of NULL if
+ * there is no match. */
+const char *configEnumGetNameOrUnknown(configEnum *ce, int val) {
+ const char *name = configEnumGetName(ce,val);
+ return name ? name : "unknown";
+}
+
+/* Used for INFO generation. */
+const char *maxmemoryToString(void) {
+ return configEnumGetNameOrUnknown(maxmemory_policy_enum,server.maxmemory);
+}
+
+/*-----------------------------------------------------------------------------
+ * Config file parsing
+ *----------------------------------------------------------------------------*/
int yesnotoi(char *s) {
if (!strcasecmp(s,"yes")) return 1;
@@ -169,12 +241,10 @@ void loadServerConfigFromString(char *config) {
exit(1);
}
} else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
- if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
- else if (!strcasecmp(argv[1],"verbose")) server.verbosity = REDIS_VERBOSE;
- else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
- else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
- else {
- err = "Invalid log level. Must be one of debug, notice, warning";
+ server.verbosity = configEnumGetValue(loglevel_enum,argv[1]);
+ if (server.verbosity == INT_MIN) {
+ err = "Invalid log level. "
+ "Must be one of debug, verbose, notice, warning";
goto loaderr;
}
} else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
@@ -201,16 +271,9 @@ void loadServerConfigFromString(char *config) {
if (server.syslog_ident) zfree(server.syslog_ident);
server.syslog_ident = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"syslog-facility") && argc == 2) {
- int i;
-
- for (i = 0; validSyslogFacilities[i].name; i++) {
- if (!strcasecmp(validSyslogFacilities[i].name, argv[1])) {
- server.syslog_facility = validSyslogFacilities[i].value;
- break;
- }
- }
-
- if (!validSyslogFacilities[i].name) {
+ server.syslog_facility =
+ configEnumGetValue(syslog_facility_enum,argv[1]);
+ if (server.syslog_facility == INT_MIN) {
err = "Invalid log facility. Must be one of USER or between LOCAL0-LOCAL7";
goto loaderr;
}
@@ -229,19 +292,9 @@ void loadServerConfigFromString(char *config) {
} else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
server.maxmemory = memtoll(argv[1],NULL);
} else if (!strcasecmp(argv[0],"maxmemory-policy") && argc == 2) {
- if (!strcasecmp(argv[1],"volatile-lru")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU;
- } else if (!strcasecmp(argv[1],"volatile-random")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_RANDOM;
- } else if (!strcasecmp(argv[1],"volatile-ttl")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_TTL;
- } else if (!strcasecmp(argv[1],"allkeys-lru")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_LRU;
- } else if (!strcasecmp(argv[1],"allkeys-random")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_RANDOM;
- } else if (!strcasecmp(argv[1],"noeviction")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION;
- } else {
+ server.maxmemory_policy =
+ configEnumGetValue(maxmemory_policy_enum,argv[1]);
+ if (server.maxmemory_policy == INT_MIN) {
err = "Invalid maxmemory policy";
goto loaderr;
}
@@ -345,13 +398,8 @@ void loadServerConfigFromString(char *config) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
- if (!strcasecmp(argv[1],"no")) {
- server.aof_fsync = AOF_FSYNC_NO;
- } else if (!strcasecmp(argv[1],"always")) {
- server.aof_fsync = AOF_FSYNC_ALWAYS;
- } else if (!strcasecmp(argv[1],"everysec")) {
- server.aof_fsync = AOF_FSYNC_EVERYSEC;
- } else {
+ server.aof_fsync = configEnumGetValue(aof_fsync_enum,argv[1]);
+ if (server.aof_fsync == INT_MIN) {
err = "argument must be 'no', 'always' or 'everysec'";
goto loaderr;
}
@@ -536,14 +584,14 @@ void loadServerConfigFromString(char *config) {
}
server.notify_keyspace_events = flags;
} else if (!strcasecmp(argv[0],"supervised") && argc == 2) {
- int mode = supervisedToMode(argv[1]);
+ server.supervised_mode =
+ configEnumGetValue(supervised_mode_enum,argv[1]);
- if (mode == -1) {
+ if (server.supervised_mode == INT_MIN) {
err = "Invalid option for 'supervised'. "
"Allowed values: 'upstart', 'systemd', 'auto', or 'no'";
goto loaderr;
}
- server.supervised_mode = mode;
} else if (!strcasecmp(argv[0],"sentinel")) {
/* argc == 1 is handled by main() as we need to enter the sentinel
* mode ASAP. */
@@ -621,6 +669,36 @@ void loadServerConfig(char *filename, char *options) {
* CONFIG SET implementation
*----------------------------------------------------------------------------*/
+#define config_set_bool_field(_name,_var) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) { \
+ int yn = yesnotoi(o->ptr); \
+ if (yn == -1) goto badfmt; \
+ _var = yn;
+
+#define config_set_numerical_field(_name,_var,min,max) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) { \
+ if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; \
+ if (min != LLONG_MIN && ll < min) goto badfmt; \
+ if (max != LLONG_MAX && ll > max) goto badfmt; \
+ _var = ll;
+
+#define config_set_memory_field(_name,_var) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) { \
+ ll = memtoll(o->ptr,&err); \
+ if (err || ll < 0) goto badfmt; \
+ _var = ll;
+
+#define config_set_enum_field(_name,_var,_enumvar) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) { \
+ int enumval = configEnumGetValue(_enumvar,o->ptr); \
+ if (enumval == INT_MIN) goto badfmt; \
+ _var = enumval;
+
+#define config_set_special_field(_name) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) {
+
+#define config_set_else } else
+
void configSetCommand(redisClient *c) {
robj *o;
long long ll;
@@ -629,31 +707,24 @@ void configSetCommand(redisClient *c) {
redisAssertWithInfo(c,c->argv[3],sdsEncodedObject(c->argv[3]));
o = c->argv[3];
- if (!strcasecmp(c->argv[2]->ptr,"dbfilename")) {
+ if (0) { /* this starts the config_set macros else-if chain. */
+
+ /* Special fields that can't be handled with general macros. */
+ config_set_special_field("dbfilename") {
if (!pathIsBaseName(o->ptr)) {
addReplyError(c, "dbfilename can't be a path, just a filename");
return;
}
zfree(server.rdb_filename);
server.rdb_filename = zstrdup(o->ptr);
- } else if (!strcasecmp(c->argv[2]->ptr,"requirepass")) {
+ } config_set_special_field("requirepass") {
if (sdslen(o->ptr) > REDIS_AUTHPASS_MAX_LEN) goto badfmt;
zfree(server.requirepass);
server.requirepass = ((char*)o->ptr)[0] ? zstrdup(o->ptr) : NULL;
- } else if (!strcasecmp(c->argv[2]->ptr,"masterauth")) {
+ } config_set_special_field("masterauth") {
zfree(server.masterauth);
server.masterauth = ((char*)o->ptr)[0] ? zstrdup(o->ptr) : NULL;
- } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory")) {
- ll = memtoll(o->ptr,&err);
- if (err || ll < 0) goto badfmt;
- server.maxmemory = ll;
- if (server.maxmemory) {
- if (server.maxmemory < zmalloc_used_memory()) {
- redisLog(REDIS_WARNING,"WARNING: the new maxmemory value set via CONFIG SET is smaller than the current memory usage. This will result in keys eviction and/or inability to accept new write commands depending on the maxmemory-policy.");
- }
- freeMemoryIfNeeded();
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"maxclients")) {
+ } config_set_special_field("maxclients") {
int orig_value = server.maxclients;
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 1) goto badfmt;
@@ -679,55 +750,7 @@ void configSetCommand(redisClient *c) {
}
}
}
- } else if (!strcasecmp(c->argv[2]->ptr,"hz")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.hz = ll;
- if (server.hz < REDIS_MIN_HZ) server.hz = REDIS_MIN_HZ;
- if (server.hz > REDIS_MAX_HZ) server.hz = REDIS_MAX_HZ;
- } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory-policy")) {
- if (!strcasecmp(o->ptr,"volatile-lru")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU;
- } else if (!strcasecmp(o->ptr,"volatile-random")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_RANDOM;
- } else if (!strcasecmp(o->ptr,"volatile-ttl")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_TTL;
- } else if (!strcasecmp(o->ptr,"allkeys-lru")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_LRU;
- } else if (!strcasecmp(o->ptr,"allkeys-random")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_RANDOM;
- } else if (!strcasecmp(o->ptr,"noeviction")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION;
- } else {
- goto badfmt;
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory-samples")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll <= 0) goto badfmt;
- server.maxmemory_samples = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"timeout")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0 || ll > LONG_MAX) goto badfmt;
- server.maxidletime = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"tcp-keepalive")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0 || ll > INT_MAX) goto badfmt;
- server.tcpkeepalive = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"appendfsync")) {
- if (!strcasecmp(o->ptr,"no")) {
- server.aof_fsync = AOF_FSYNC_NO;
- } else if (!strcasecmp(o->ptr,"everysec")) {
- server.aof_fsync = AOF_FSYNC_EVERYSEC;
- } else if (!strcasecmp(o->ptr,"always")) {
- server.aof_fsync = AOF_FSYNC_ALWAYS;
- } else {
- goto badfmt;
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"no-appendfsync-on-rewrite")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.aof_no_fsync_on_rewrite = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"appendonly")) {
+ } config_set_special_field("appendonly") {
int enable = yesnotoi(o->ptr);
if (enable == -1) goto badfmt;
@@ -740,23 +763,7 @@ void configSetCommand(redisClient *c) {
return;
}
}
- } else if (!strcasecmp(c->argv[2]->ptr,"auto-aof-rewrite-percentage")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.aof_rewrite_perc = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"auto-aof-rewrite-min-size")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.aof_rewrite_min_size = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"aof-rewrite-incremental-fsync")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.aof_rewrite_incremental_fsync = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"aof-load-truncated")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.aof_load_truncated = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"save")) {
+ } config_set_special_field("save") {
int vlen, j;
sds *v = sdssplitlen(o->ptr,sdslen(o->ptr)," ",1,&vlen);
@@ -790,70 +797,12 @@ void configSetCommand(redisClient *c) {
appendServerSaveParams(seconds, changes);
}
sdsfreesplitres(v,vlen);
- } else if (!strcasecmp(c->argv[2]->ptr,"slave-serve-stale-data")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.repl_serve_stale_data = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"slave-read-only")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.repl_slave_ro = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"dir")) {
+ } config_set_special_field("dir") {
if (chdir((char*)o->ptr) == -1) {
addReplyErrorFormat(c,"Changing directory: %s", strerror(errno));
return;
}
- } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-ziplist-entries")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.hash_max_ziplist_entries = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-ziplist-value")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.hash_max_ziplist_value = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"list-max-ziplist-size")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.list_max_ziplist_size = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"list-compress-depth")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.list_compress_depth = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"set-max-intset-entries")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.set_max_intset_entries = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"zset-max-ziplist-entries")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.zset_max_ziplist_entries = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"zset-max-ziplist-value")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.zset_max_ziplist_value = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"hll-sparse-max-bytes")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.hll_sparse_max_bytes = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"lua-time-limit")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.lua_time_limit = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"slowlog-log-slower-than")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR) goto badfmt;
- server.slowlog_log_slower_than = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"slowlog-max-len")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.slowlog_max_len = (unsigned)ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"latency-monitor-threshold")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.latency_monitor_threshold = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"loglevel")) {
- if (!strcasecmp(o->ptr,"warning")) {
- server.verbosity = REDIS_WARNING;
- } else if (!strcasecmp(o->ptr,"notice")) {
- server.verbosity = REDIS_NOTICE;
- } else if (!strcasecmp(o->ptr,"verbose")) {
- server.verbosity = REDIS_VERBOSE;
- } else if (!strcasecmp(o->ptr,"debug")) {
- server.verbosity = REDIS_DEBUG;
- } else {
- goto badfmt;
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"client-output-buffer-limit")) {
+ } config_set_special_field("client-output-buffer-limit") {
int vlen, j;
sds *v = sdssplitlen(o->ptr,sdslen(o->ptr)," ",1,&vlen);
@@ -898,90 +847,137 @@ void configSetCommand(redisClient *c) {
server.client_obuf_limits[class].soft_limit_seconds = soft_seconds;
}
sdsfreesplitres(v,vlen);
- } else if (!strcasecmp(c->argv[2]->ptr,"stop-writes-on-bgsave-error")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.stop_writes_on_bgsave_err = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-ping-slave-period")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt;
- server.repl_ping_slave_period = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-timeout")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt;
- server.repl_timeout = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-backlog-size")) {
- ll = memtoll(o->ptr,&err);
- if (err || ll < 0) goto badfmt;
- resizeReplicationBacklog(ll);
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-backlog-ttl")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.repl_backlog_time_limit = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"watchdog-period")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- if (ll)
- enableWatchdog(ll);
- else
- disableWatchdog();
- } else if (!strcasecmp(c->argv[2]->ptr,"rdbcompression")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.rdb_compression = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"notify-keyspace-events")) {
+ } config_set_special_field("notify-keyspace-events") {
int flags = keyspaceEventsStringToFlags(o->ptr);
if (flags == -1) goto badfmt;
server.notify_keyspace_events = flags;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-disable-tcp-nodelay")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.repl_disable_tcp_nodelay = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-diskless-sync")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.repl_diskless_sync = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-diskless-sync-delay")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.repl_diskless_sync_delay = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"slave-priority")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.slave_priority = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"min-slaves-to-write")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.repl_min_slaves_to_write = ll;
+
+ /* Boolean fields.
+ * config_set_bool_field(name,var). */
+ } config_set_bool_field(
+ "rdbcompression", server.rdb_compression) {
+ } config_set_bool_field(
+ "repl-disable-tcp-nodelay",server.repl_disable_tcp_nodelay) {
+ } config_set_bool_field(
+ "repl-diskless-sync",server.repl_diskless_sync) {
+ } config_set_bool_field(
+ "cluster-require-full-coverage",server.cluster_require_full_coverage) {
+ } config_set_bool_field(
+ "aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync) {
+ } config_set_bool_field(
+ "aof-load-truncated",server.aof_load_truncated) {
+ } config_set_bool_field(
+ "slave-serve-stale-data",server.repl_serve_stale_data) {
+ } config_set_bool_field(
+ "slave-read-only",server.repl_slave_ro) {
+ } config_set_bool_field(
+ "activerehashing",server.activerehashing) {
+ } config_set_bool_field(
+ "stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err) {
+
+ /* Numerical fields.
+ * config_set_numerical_field(name,var,min,max) */
+ } config_set_numerical_field(
+ "tcp-keepalive",server.tcpkeepalive,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "maxmemory-samples",server.maxmemory_samples,1,LLONG_MAX) {
+ } config_set_numerical_field(
+ "timeout",server.maxidletime,0,LONG_MAX) {
+ } config_set_numerical_field(
+ "auto-aof-rewrite-percentage",server.aof_rewrite_perc,0,LLONG_MAX){
+ } config_set_numerical_field(
+ "auto-aof-rewrite-min-size",server.aof_rewrite_min_size,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "hash-max-ziplist-entries",server.hash_max_ziplist_entries,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "hash-max-ziplist-value",server.hash_max_ziplist_value,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "list-max-ziplist-size",server.list_max_ziplist_size,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "list-compress-depth",server.list_compress_depth,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "set-max-intset-entries",server.set_max_intset_entries,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "zset-max-ziplist-entries",server.zset_max_ziplist_entries,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "zset-max-ziplist-value",server.zset_max_ziplist_value,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "hll-sparse-max-bytes",server.hll_sparse_max_bytes,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "lua-time-limit",server.lua_time_limit,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "slowlog-log-slower-than",server.slowlog_log_slower_than,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "slowlog-max-len",ll,0,LLONG_MAX) {
+ /* Cast to unsigned. */
+ server.slowlog_max_len = (unsigned)ll;
+ } config_set_numerical_field(
+ "latency-monitor-threshold",server.latency_monitor_threshold,0,LLONG_MAX){
+ } config_set_numerical_field(
+ "repl-ping-slave-period",server.repl_ping_slave_period,1,LLONG_MAX) {
+ } config_set_numerical_field(
+ "repl-timeout",server.repl_timeout,1,LLONG_MAX) {
+ } config_set_numerical_field(
+ "repl-backlog-ttl",server.repl_backlog_time_limit,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "repl-diskless-sync-delay",server.repl_diskless_sync_delay,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "slave-priority",server.slave_priority,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "min-slaves-to-write",server.repl_min_slaves_to_write,0,LLONG_MAX) {
refreshGoodSlavesCount();
- } else if (!strcasecmp(c->argv[2]->ptr,"min-slaves-max-lag")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.repl_min_slaves_max_lag = ll;
+ } config_set_numerical_field(
+ "min-slaves-max-lag",server.repl_min_slaves_max_lag,0,LLONG_MAX) {
refreshGoodSlavesCount();
- } else if (!strcasecmp(c->argv[2]->ptr,"cluster-require-full-coverage")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.cluster_require_full_coverage = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"cluster-node-timeout")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll <= 0) goto badfmt;
- server.cluster_node_timeout = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"cluster-migration-barrier")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.cluster_migration_barrier = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"cluster-slave-validity-factor")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.cluster_slave_validity_factor = ll;
- } else {
+ } config_set_numerical_field(
+ "cluster-node-timeout",server.cluster_node_timeout,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "cluster-migration-barrier",server.cluster_migration_barrier,0,LLONG_MAX){
+ } config_set_numerical_field(
+ "cluster-slave-validity-factor",server.cluster_slave_validity_factor,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "hz",server.hz,0,LLONG_MAX) {
+ /* Hz is more an hint from the user, so we accept values out of range
+ * but cap them to reasonable values. */
+ if (server.hz < REDIS_MIN_HZ) server.hz = REDIS_MIN_HZ;
+ if (server.hz > REDIS_MAX_HZ) server.hz = REDIS_MAX_HZ;
+ } config_set_numerical_field(
+ "watchdog-period",ll,0,LLONG_MAX) {
+ if (ll)
+ enableWatchdog(ll);
+ else
+ disableWatchdog();
+
+ /* Memory fields.
+ * config_set_memory_field(name,var) */
+ } config_set_memory_field("maxmemory",server.maxmemory) {
+ if (server.maxmemory) {
+ if (server.maxmemory < zmalloc_used_memory()) {
+ redisLog(REDIS_WARNING,"WARNING: the new maxmemory value set via CONFIG SET is smaller than the current memory usage. This will result in keys eviction and/or inability to accept new write commands depending on the maxmemory-policy.");
+ }
+ freeMemoryIfNeeded();
+ }
+ } config_set_memory_field("repl-backlog-size",ll) {
+ resizeReplicationBacklog(ll);
+
+ /* Enumeration fields.
+ * config_set_enum_field(name,var,enum_var) */
+ } config_set_enum_field(
+ "loglevel",server.verbosity,loglevel_enum) {
+ } config_set_enum_field(
+ "maxmemory-policy",server.maxmemory_policy,maxmemory_policy_enum) {
+ } config_set_enum_field(
+ "appendfsync",server.aof_fsync,aof_fsync_enum) {
+
+ /* Everyhing else is an error... */
+ } config_set_else {
addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s",
(char*)c->argv[2]->ptr);
return;
}
+
+ /* On success we just return a generic OK for all the options. */
addReply(c,shared.ok);
return;
@@ -1020,47 +1016,14 @@ badfmt: /* Bad format errors */
} \
} while(0);
-char *maxmemoryToString() {
- char *s;
- switch(server.maxmemory_policy) {
- case REDIS_MAXMEMORY_VOLATILE_LRU: s = "volatile-lru"; break;
- case REDIS_MAXMEMORY_VOLATILE_TTL: s = "volatile-ttl"; break;
- case REDIS_MAXMEMORY_VOLATILE_RANDOM: s = "volatile-random"; break;
- case REDIS_MAXMEMORY_ALLKEYS_LRU: s = "allkeys-lru"; break;
- case REDIS_MAXMEMORY_ALLKEYS_RANDOM: s = "allkeys-random"; break;
- case REDIS_MAXMEMORY_NO_EVICTION: s = "noeviction"; break;
- default: s = "unknown"; break;
- }
- return s;
-}
-
-int supervisedToMode(const char *str) {
- int mode;
- if (!strcasecmp(str,"upstart")) {
- mode = REDIS_SUPERVISED_UPSTART;
- } else if (!strcasecmp(str,"systemd")) {
- mode = REDIS_SUPERVISED_SYSTEMD;
- } else if (!strcasecmp(str,"auto")) {
- mode = REDIS_SUPERVISED_AUTODETECT;
- } else if (!strcasecmp(str,"no")) {
- mode = REDIS_SUPERVISED_NONE;
- } else {
- mode = -1;
- }
- return mode;
-}
+#define config_get_enum_field(_name,_var,_enumvar) do { \
+ if (stringmatch(pattern,_name,0)) { \
+ addReplyBulkCString(c,_name); \
+ addReplyBulkCString(c,configEnumGetNameOrUnknown(_enumvar,_var)); \
+ matches++; \
+ } \
+} while(0);
-char *supervisedToString(void) {
- char *s;
- switch(server.supervised_mode) {
- case REDIS_SUPERVISED_UPSTART: s = "upstart"; break;
- case REDIS_SUPERVISED_SYSTEMD: s = "systemd"; break;
- case REDIS_SUPERVISED_AUTODETECT: s = "auto"; break;
- case REDIS_SUPERVISED_NONE: s = "no"; break;
- default: s = "no"; break;
- }
- return s;
-}
void configGetCommand(redisClient *c) {
robj *o = c->argv[2];
void *replylen = addDeferredMultiBulkLength(c);
@@ -1081,7 +1044,6 @@ void configGetCommand(redisClient *c) {
config_get_numerical_field("maxmemory",server.maxmemory);
config_get_numerical_field("maxmemory-samples",server.maxmemory_samples);
config_get_numerical_field("timeout",server.maxidletime);
- config_get_numerical_field("tcp-keepalive",server.tcpkeepalive);
config_get_numerical_field("auto-aof-rewrite-percentage",
server.aof_rewrite_perc);
config_get_numerical_field("auto-aof-rewrite-min-size",
@@ -1126,6 +1088,7 @@ void configGetCommand(redisClient *c) {
config_get_numerical_field("cluster-migration-barrier",server.cluster_migration_barrier);
config_get_numerical_field("cluster-slave-validity-factor",server.cluster_slave_validity_factor);
config_get_numerical_field("repl-diskless-sync-delay",server.repl_diskless_sync_delay);
+ config_get_numerical_field("tcp-keepalive",server.tcpkeepalive);
/* Bool (yes/no) values */
config_get_bool_field("cluster-require-full-coverage",
@@ -1151,6 +1114,18 @@ void configGetCommand(redisClient *c) {
config_get_bool_field("aof-load-truncated",
server.aof_load_truncated);
+ /* Enum values */
+ config_get_enum_field("maxmemory-policy",
+ server.maxmemory_policy,maxmemory_policy_enum);
+ config_get_enum_field("loglevel",
+ server.verbosity,loglevel_enum);
+ config_get_enum_field("supervised",
+ server.supervised_mode,supervised_mode_enum);
+ config_get_enum_field("appendfsync",
+ server.aof_fsync,aof_fsync_enum);
+ config_get_enum_field("syslog-facility",
+ server.syslog_facility,syslog_facility_enum);
+
/* Everything we can't handle with macros follows. */
if (stringmatch(pattern,"appendonly",0)) {
@@ -1168,24 +1143,6 @@ void configGetCommand(redisClient *c) {
addReplyBulkCString(c,buf);
matches++;
}
- if (stringmatch(pattern,"maxmemory-policy",0)) {
- addReplyBulkCString(c,"maxmemory-policy");
- addReplyBulkCString(c,maxmemoryToString());
- matches++;
- }
- if (stringmatch(pattern,"appendfsync",0)) {
- char *policy;
-
- switch(server.aof_fsync) {
- case AOF_FSYNC_NO: policy = "no"; break;
- case AOF_FSYNC_EVERYSEC: policy = "everysec"; break;
- case AOF_FSYNC_ALWAYS: policy = "always"; break;
- default: policy = "unknown"; break; /* too harmless to panic */
- }
- addReplyBulkCString(c,"appendfsync");
- addReplyBulkCString(c,policy);
- matches++;
- }
if (stringmatch(pattern,"save",0)) {
sds buf = sdsempty();
int j;
@@ -1202,25 +1159,6 @@ void configGetCommand(redisClient *c) {
sdsfree(buf);
matches++;
}
- if (stringmatch(pattern,"loglevel",0)) {
- char *s;
-
- switch(server.verbosity) {
- case REDIS_WARNING: s = "warning"; break;
- case REDIS_VERBOSE: s = "verbose"; break;
- case REDIS_NOTICE: s = "notice"; break;
- case REDIS_DEBUG: s = "debug"; break;
- default: s = "unknown"; break; /* too harmless to panic */
- }
- addReplyBulkCString(c,"loglevel");
- addReplyBulkCString(c,s);
- matches++;
- }
- if (stringmatch(pattern,"supervised",0)) {
- addReplyBulkCString(c,"supervised");
- addReplyBulkCString(c,supervisedToString());
- matches++;
- }
if (stringmatch(pattern,"client-output-buffer-limit",0)) {
sds buf = sdsempty();
int j;
@@ -1345,7 +1283,7 @@ void rewriteConfigAddLineNumberToOption(struct rewriteConfigState *state, sds op
* This is useful as only unused lines of processed options will be blanked
* in the config file, while options the rewrite process does not understand
* remain untouched. */
-void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, char *option) {
+void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, const char *option) {
sds opt = sdsnew(option);
if (dictAdd(state->rewritten,opt,NULL) != DICT_OK) sdsfree(opt);
@@ -1429,7 +1367,7 @@ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) {
*
* "line" is either used, or freed, so the caller does not need to free it
* in any way. */
-void rewriteConfigRewriteLine(struct rewriteConfigState *state, char *option, sds line, int force) {
+void rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force) {
sds o = sdsnew(option);
list *l = dictFetchValue(state->option_to_line,o);
@@ -1540,45 +1478,26 @@ void rewriteConfigOctalOption(struct rewriteConfigState *state, char *option, in
rewriteConfigRewriteLine(state,option,line,force);
}
-/* Rewrite an enumeration option, after the "value" every enum/value pair
- * is specified, terminated by NULL. After NULL the default value is
- * specified. See how the function is used for more information. */
-void rewriteConfigEnumOption(struct rewriteConfigState *state, char *option, int value, ...) {
- va_list ap;
- char *enum_name, *matching_name = NULL;
- int enum_val, def_val, force;
+/* Rewrite an enumeration option. It takes as usually state and option name,
+ * and in addition the enumeration array and the default value for the
+ * option. */
+void rewriteConfigEnumOption(struct rewriteConfigState *state, char *option, int value, configEnum *ce, int defval) {
sds line;
+ const char *name = configEnumGetNameOrUnknown(ce,value);
+ int force = value != defval;
- va_start(ap, value);
- while(1) {
- enum_name = va_arg(ap,char*);
- enum_val = va_arg(ap,int);
- if (enum_name == NULL) {
- def_val = enum_val;
- break;
- }
- if (value == enum_val) matching_name = enum_name;
- }
- va_end(ap);
-
- force = value != def_val;
- line = sdscatprintf(sdsempty(),"%s %s",option,matching_name);
+ line = sdscatprintf(sdsempty(),"%s %s",option,name);
rewriteConfigRewriteLine(state,option,line,force);
}
/* Rewrite the syslog-facility option. */
void rewriteConfigSyslogfacilityOption(struct rewriteConfigState *state) {
- int value = server.syslog_facility, j;
+ int value = server.syslog_facility;
int force = value != LOG_LOCAL0;
- char *name = NULL, *option = "syslog-facility";
+ const char *name = NULL, *option = "syslog-facility";
sds line;
- for (j = 0; validSyslogFacilities[j].name; j++) {
- if (validSyslogFacilities[j].value == value) {
- name = (char*) validSyslogFacilities[j].name;
- break;
- }
- }
+ name = configEnumGetNameOrUnknown(syslog_facility_enum,value);
line = sdscatprintf(sdsempty(),"%s %s",option,name);
rewriteConfigRewriteLine(state,option,line,force);
}
@@ -1839,12 +1758,7 @@ int rewriteConfig(char *path) {
rewriteConfigOctalOption(state,"unixsocketperm",server.unixsocketperm,REDIS_DEFAULT_UNIX_SOCKET_PERM);
rewriteConfigNumericalOption(state,"timeout",server.maxidletime,REDIS_MAXIDLETIME);
rewriteConfigNumericalOption(state,"tcp-keepalive",server.tcpkeepalive,REDIS_DEFAULT_TCP_KEEPALIVE);
- rewriteConfigEnumOption(state,"loglevel",server.verbosity,
- "debug", REDIS_DEBUG,
- "verbose", REDIS_VERBOSE,
- "notice", REDIS_NOTICE,
- "warning", REDIS_WARNING,
- NULL, REDIS_DEFAULT_VERBOSITY);
+ rewriteConfigEnumOption(state,"loglevel",server.verbosity,loglevel_enum,REDIS_DEFAULT_VERBOSITY);
rewriteConfigStringOption(state,"logfile",server.logfile,REDIS_DEFAULT_LOGFILE);
rewriteConfigYesNoOption(state,"syslog-enabled",server.syslog_enabled,REDIS_DEFAULT_SYSLOG_ENABLED);
rewriteConfigStringOption(state,"syslog-ident",server.syslog_ident,REDIS_DEFAULT_SYSLOG_IDENT);
@@ -1873,22 +1787,11 @@ int rewriteConfig(char *path) {
rewriteConfigStringOption(state,"requirepass",server.requirepass,NULL);
rewriteConfigNumericalOption(state,"maxclients",server.maxclients,REDIS_MAX_CLIENTS);
rewriteConfigBytesOption(state,"maxmemory",server.maxmemory,REDIS_DEFAULT_MAXMEMORY);
- rewriteConfigEnumOption(state,"maxmemory-policy",server.maxmemory_policy,
- "volatile-lru", REDIS_MAXMEMORY_VOLATILE_LRU,
- "allkeys-lru", REDIS_MAXMEMORY_ALLKEYS_LRU,
- "volatile-random", REDIS_MAXMEMORY_VOLATILE_RANDOM,
- "allkeys-random", REDIS_MAXMEMORY_ALLKEYS_RANDOM,
- "volatile-ttl", REDIS_MAXMEMORY_VOLATILE_TTL,
- "noeviction", REDIS_MAXMEMORY_NO_EVICTION,
- NULL, REDIS_DEFAULT_MAXMEMORY_POLICY);
+ rewriteConfigEnumOption(state,"maxmemory-policy",server.maxmemory_policy,maxmemory_policy_enum,REDIS_DEFAULT_MAXMEMORY_POLICY);
rewriteConfigNumericalOption(state,"maxmemory-samples",server.maxmemory_samples,REDIS_DEFAULT_MAXMEMORY_SAMPLES);
rewriteConfigYesNoOption(state,"appendonly",server.aof_state != REDIS_AOF_OFF,0);
rewriteConfigStringOption(state,"appendfilename",server.aof_filename,REDIS_DEFAULT_AOF_FILENAME);
- rewriteConfigEnumOption(state,"appendfsync",server.aof_fsync,
- "everysec", AOF_FSYNC_EVERYSEC,
- "always", AOF_FSYNC_ALWAYS,
- "no", AOF_FSYNC_NO,
- NULL, REDIS_DEFAULT_AOF_FSYNC);
+ rewriteConfigEnumOption(state,"appendfsync",server.aof_fsync,aof_fsync_enum,REDIS_DEFAULT_AOF_FSYNC);
rewriteConfigYesNoOption(state,"no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite,REDIS_DEFAULT_AOF_NO_FSYNC_ON_REWRITE);
rewriteConfigNumericalOption(state,"auto-aof-rewrite-percentage",server.aof_rewrite_perc,REDIS_AOF_REWRITE_PERC);
rewriteConfigBytesOption(state,"auto-aof-rewrite-min-size",server.aof_rewrite_min_size,REDIS_AOF_REWRITE_MIN_SIZE);
@@ -1916,12 +1819,9 @@ int rewriteConfig(char *path) {
rewriteConfigNumericalOption(state,"hz",server.hz,REDIS_DEFAULT_HZ);
rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,REDIS_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC);
rewriteConfigYesNoOption(state,"aof-load-truncated",server.aof_load_truncated,REDIS_DEFAULT_AOF_LOAD_TRUNCATED);
- rewriteConfigEnumOption(state,"supervised",server.supervised_mode,
- "upstart", REDIS_SUPERVISED_UPSTART,
- "systemd", REDIS_SUPERVISED_SYSTEMD,
- "auto", REDIS_SUPERVISED_AUTODETECT,
- "no", REDIS_SUPERVISED_NONE,
- NULL, REDIS_SUPERVISED_NONE);
+ rewriteConfigEnumOption(state,"supervised",server.supervised_mode,supervised_mode_enum,REDIS_SUPERVISED_NONE);
+
+ /* Rewrite Sentinel config if in Sentinel mode. */
if (server.sentinel_mode) rewriteConfigSentinelOption(state);
/* Step 3: remove all the orphaned lines in the old file, that is, lines
diff --git a/src/db.c b/src/db.c
index 36650318a..1493f0a20 100644
--- a/src/db.c
+++ b/src/db.c
@@ -318,13 +318,17 @@ void delCommand(redisClient *c) {
addReplyLongLong(c,deleted);
}
+/* EXISTS key1 key2 ... key_N.
+ * Return value is the number of keys existing. */
void existsCommand(redisClient *c) {
- expireIfNeeded(c->db,c->argv[1]);
- if (dbExists(c->db,c->argv[1])) {
- addReply(c, shared.cone);
- } else {
- addReply(c, shared.czero);
+ long long count = 0;
+ int j;
+
+ for (j = 1; j < c->argc; j++) {
+ expireIfNeeded(c->db,c->argv[j]);
+ if (dbExists(c->db,c->argv[j])) count++;
}
+ addReplyLongLong(c,count);
}
void selectCommand(redisClient *c) {
diff --git a/src/debug.c b/src/debug.c
index b8dcf648e..2acba1495 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -425,6 +425,27 @@ void debugCommand(redisClient *c) {
sizes = sdscatprintf(sizes,"dictentry:%d ", (int)sizeof(dictEntry));
sizes = sdscatprintf(sizes,"sdshdr:%d", (int)sizeof(struct sdshdr));
addReplyBulkSds(c,sizes);
+ } else if (!strcasecmp(c->argv[1]->ptr,"htstats") && c->argc == 3) {
+ long dbid;
+ sds stats = sdsempty();
+ char buf[4096];
+
+ if (getLongFromObjectOrReply(c, c->argv[2], &dbid, NULL) != REDIS_OK)
+ return;
+ if (dbid < 0 || dbid >= server.dbnum) {
+ addReplyError(c,"Out of range database");
+ return;
+ }
+
+ stats = sdscatprintf(stats,"[Dictionary HT]\n");
+ dictGetStats(buf,sizeof(buf),server.db[dbid].dict);
+ stats = sdscat(stats,buf);
+
+ stats = sdscatprintf(stats,"[Expires HT]\n");
+ dictGetStats(buf,sizeof(buf),server.db[dbid].expires);
+ stats = sdscat(stats,buf);
+
+ addReplyBulkSds(c,stats);
} else if (!strcasecmp(c->argv[1]->ptr,"jemalloc") && c->argc == 3) {
#if defined(USE_JEMALLOC)
if (!strcasecmp(c->argv[2]->ptr, "info")) {
diff --git a/src/dict.c b/src/dict.c
index dbcfeb492..068262757 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -687,10 +687,10 @@ dictEntry *dictGetRandomKey(dict *d)
* statistics. However the function is much faster than dictGetRandomKey()
* at producing N elements. */
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
- unsigned int j; /* internal hash table id, 0 or 1. */
- unsigned int tables; /* 1 or 2 tables? */
- unsigned int stored = 0, maxsizemask;
- unsigned int maxsteps;
+ unsigned long j; /* internal hash table id, 0 or 1. */
+ unsigned long tables; /* 1 or 2 tables? */
+ unsigned long stored = 0, maxsizemask;
+ unsigned long maxsteps;
if (dictSize(d) < count) count = dictSize(d);
maxsteps = count*10;
@@ -709,14 +709,14 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
maxsizemask = d->ht[1].sizemask;
/* Pick a random point inside the larger table. */
- unsigned int i = random() & maxsizemask;
- unsigned int emptylen = 0; /* Continuous empty entries so far. */
+ unsigned long i = random() & maxsizemask;
+ unsigned long emptylen = 0; /* Continuous empty entries so far. */
while(stored < count && maxsteps--) {
for (j = 0; j < tables; j++) {
/* Invariant of the dict.c rehashing: up to the indexes already
* visited in ht[0] during the rehashing, there are no populated
* buckets, so we can skip ht[0] for indexes between 0 and idx-1. */
- if (tables == 2 && j == 0 && i < d->rehashidx) {
+ if (tables == 2 && j == 0 && i < (unsigned long) d->rehashidx) {
/* Moreover, if we are currently out of range in the second
* table, there will be no elements in both tables up to
* the current rehashing index, so we jump if possible.
@@ -1002,24 +1002,21 @@ void dictDisableResize(void) {
dict_can_resize = 0;
}
-#if 0
-
-/* The following is code that we don't use for Redis currently, but that is part
-of the library. */
-
-/* ----------------------- Debugging ------------------------*/
+/* ------------------------------- Debugging ---------------------------------*/
#define DICT_STATS_VECTLEN 50
-static void _dictPrintStatsHt(dictht *ht) {
+size_t _dictGetStatsHt(char *buf, size_t bufsize, dictht *ht, int tableid) {
unsigned long i, slots = 0, chainlen, maxchainlen = 0;
unsigned long totchainlen = 0;
unsigned long clvector[DICT_STATS_VECTLEN];
+ size_t l = 0;
if (ht->used == 0) {
- printf("No stats available for empty dictionaries\n");
- return;
+ return snprintf(buf,bufsize,
+ "No stats available for empty dictionaries\n");
}
+ /* Compute stats. */
for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
for (i = 0; i < ht->size; i++) {
dictEntry *he;
@@ -1040,89 +1037,46 @@ static void _dictPrintStatsHt(dictht *ht) {
if (chainlen > maxchainlen) maxchainlen = chainlen;
totchainlen += chainlen;
}
- printf("Hash table stats:\n");
- printf(" table size: %ld\n", ht->size);
- printf(" number of elements: %ld\n", ht->used);
- printf(" different slots: %ld\n", slots);
- printf(" max chain length: %ld\n", maxchainlen);
- printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
- printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
- printf(" Chain length distribution:\n");
+
+ /* Generate human readable stats. */
+ l += snprintf(buf+l,bufsize-l,
+ "Hash table %d stats (%s):\n"
+ " table size: %ld\n"
+ " number of elements: %ld\n"
+ " different slots: %ld\n"
+ " max chain length: %ld\n"
+ " avg chain length (counted): %.02f\n"
+ " avg chain length (computed): %.02f\n"
+ " Chain length distribution:\n",
+ tableid, (tableid == 0) ? "main hash table" : "rehashing target",
+ ht->size, ht->used, slots, maxchainlen,
+ (float)totchainlen/slots, (float)ht->used/slots);
+
for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
if (clvector[i] == 0) continue;
- printf(" %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
+ if (l >= bufsize) break;
+ l += snprintf(buf+l,bufsize-l,
+ " %s%ld: %ld (%.02f%%)\n",
+ (i == DICT_STATS_VECTLEN-1)?">= ":"",
+ i, clvector[i], ((float)clvector[i]/ht->size)*100);
}
-}
-
-void dictPrintStats(dict *d) {
- _dictPrintStatsHt(&d->ht[0]);
- if (dictIsRehashing(d)) {
- printf("-- Rehashing into ht[1]:\n");
- _dictPrintStatsHt(&d->ht[1]);
- }
-}
-
-/* ----------------------- StringCopy Hash Table Type ------------------------*/
-
-static unsigned int _dictStringCopyHTHashFunction(const void *key)
-{
- return dictGenHashFunction(key, strlen(key));
-}
-
-static void *_dictStringDup(void *privdata, const void *key)
-{
- int len = strlen(key);
- char *copy = zmalloc(len+1);
- DICT_NOTUSED(privdata);
- memcpy(copy, key, len);
- copy[len] = '\0';
- return copy;
+ /* Unlike snprintf(), teturn the number of characters actually written. */
+ if (bufsize) buf[bufsize-1] = '\0';
+ return strlen(buf);
}
-static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1,
- const void *key2)
-{
- DICT_NOTUSED(privdata);
+void dictGetStats(char *buf, size_t bufsize, dict *d) {
+ size_t l;
+ char *orig_buf = buf;
+ size_t orig_bufsize = bufsize;
- return strcmp(key1, key2) == 0;
-}
-
-static void _dictStringDestructor(void *privdata, void *key)
-{
- DICT_NOTUSED(privdata);
-
- zfree(key);
+ l = _dictGetStatsHt(buf,bufsize,&d->ht[0],0);
+ buf += l;
+ bufsize -= l;
+ if (dictIsRehashing(d) && bufsize > 0) {
+ _dictGetStatsHt(buf,bufsize,&d->ht[1],1);
+ }
+ /* Make sure there is a NULL term at the end. */
+ if (orig_bufsize) orig_buf[orig_bufsize-1] = '\0';
}
-
-dictType dictTypeHeapStringCopyKey = {
- _dictStringCopyHTHashFunction, /* hash function */
- _dictStringDup, /* key dup */
- NULL, /* val dup */
- _dictStringCopyHTKeyCompare, /* key compare */
- _dictStringDestructor, /* key destructor */
- NULL /* val destructor */
-};
-
-/* This is like StringCopy but does not auto-duplicate the key.
- * It's used for intepreter's shared strings. */
-dictType dictTypeHeapStrings = {
- _dictStringCopyHTHashFunction, /* hash function */
- NULL, /* key dup */
- NULL, /* val dup */
- _dictStringCopyHTKeyCompare, /* key compare */
- _dictStringDestructor, /* key destructor */
- NULL /* val destructor */
-};
-
-/* This is like StringCopy but also automatically handle dynamic
- * allocated C strings as values. */
-dictType dictTypeHeapStringCopyKeyValue = {
- _dictStringCopyHTHashFunction, /* hash function */
- _dictStringDup, /* key dup */
- _dictStringDup, /* val dup */
- _dictStringCopyHTKeyCompare, /* key compare */
- _dictStringDestructor, /* key destructor */
- _dictStringDestructor, /* val destructor */
-};
-#endif
diff --git a/src/dict.h b/src/dict.h
index 014d18212..e31daee2a 100644
--- a/src/dict.h
+++ b/src/dict.h
@@ -165,7 +165,7 @@ dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
-void dictPrintStats(dict *d);
+void dictGetStats(char *buf, size_t bufsize, dict *d);
unsigned int dictGenHashFunction(const void *key, int len);
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);
void dictEmpty(dict *d, void(callback)(void*));
diff --git a/src/geo.c b/src/geo.c
new file mode 100644
index 000000000..63500bed7
--- /dev/null
+++ b/src/geo.c
@@ -0,0 +1,706 @@
+/*
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "geo.h"
+#include "geohash_helper.h"
+
+/* Things exported from t_zset.c only for geo.c, since it is the only other
+ * part of Redis that requires close zset introspection. */
+unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range);
+int zslValueLteMax(double value, zrangespec *spec);
+
+/* ====================================================================
+ * This file implements the following commands:
+ *
+ * - geoadd - add coordinates for value to geoset
+ * - georadius - search radius by coordinates in geoset
+ * - georadiusbymember - search radius based on geoset member position
+ * ==================================================================== */
+
+/* ====================================================================
+ * geoArray implementation
+ * ==================================================================== */
+
+/* Create a new array of geoPoints. */
+geoArray *geoArrayCreate(void) {
+ geoArray *ga = zmalloc(sizeof(*ga));
+ /* It gets allocated on first geoArrayAppend() call. */
+ ga->array = NULL;
+ ga->buckets = 0;
+ ga->used = 0;
+ return ga;
+}
+
+/* Add a new entry and return its pointer so that the caller can populate
+ * it with data. */
+geoPoint *geoArrayAppend(geoArray *ga) {
+ if (ga->used == ga->buckets) {
+ ga->buckets = (ga->buckets == 0) ? 8 : ga->buckets*2;
+ ga->array = zrealloc(ga->array,sizeof(geoPoint)*ga->buckets);
+ }
+ geoPoint *gp = ga->array+ga->used;
+ ga->used++;
+ return gp;
+}
+
+/* Destroy a geoArray created with geoArrayCreate(). */
+void geoArrayFree(geoArray *ga) {
+ size_t i;
+ for (i = 0; i < ga->used; i++) sdsfree(ga->array[i].member);
+ zfree(ga->array);
+ zfree(ga);
+}
+
+/* ====================================================================
+ * Helpers
+ * ==================================================================== */
+int decodeGeohash(double bits, double *xy) {
+ GeoHashBits hash = { .bits = (uint64_t)bits, .step = GEO_STEP_MAX };
+ return geohashDecodeToLongLatWGS84(hash, xy);
+}
+
+/* Input Argument Helper */
+/* Take a pointer to the latitude arg then use the next arg for longitude.
+ * On parse error REDIS_ERR is returned, otherwise REDIS_OK. */
+int extractLongLatOrReply(redisClient *c, robj **argv,
+ double *xy) {
+ for (int i = 0; i < 2; i++) {
+ if (getDoubleFromObjectOrReply(c, argv[i], xy + i, NULL) !=
+ REDIS_OK) {
+ return REDIS_ERR;
+ }
+ if (xy[0] < GEO_LONG_MIN || xy[0] > GEO_LONG_MAX ||
+ xy[1] < GEO_LAT_MIN || xy[1] > GEO_LAT_MAX) {
+ addReplySds(c, sdscatprintf(sdsempty(),
+ "-ERR invalid longitude,latitude pair %f,%f\r\n",xy[0],xy[1]));
+ return REDIS_ERR;
+ }
+ }
+ return REDIS_OK;
+}
+
+/* Input Argument Helper */
+/* Decode lat/long from a zset member's score.
+ * Returns REDIS_OK on successful decoding, otherwise REDIS_ERR is returned. */
+int longLatFromMember(robj *zobj, robj *member, double *xy) {
+ double score = 0;
+
+ if (zsetScore(zobj, member, &score) == REDIS_ERR) return REDIS_ERR;
+ if (!decodeGeohash(score, xy)) return REDIS_ERR;
+ return REDIS_OK;
+}
+
+/* Check that the unit argument matches one of the known units, and returns
+ * the conversion factor to meters (you need to divide meters by the conversion
+ * factor to convert to the right unit).
+ *
+ * If the unit is not valid, an error is reported to the client, and a value
+ * less than zero is returned. */
+double extractUnitOrReply(redisClient *c, robj *unit) {
+ char *u = unit->ptr;
+
+ if (!strcmp(u, "m")) {
+ return 1;
+ } else if (!strcmp(u, "km")) {
+ return 1000;
+ } else if (!strcmp(u, "ft")) {
+ return 0.3048;
+ } else if (!strcmp(u, "mi")) {
+ return 1609.34;
+ } else {
+ addReplyError(c,
+ "unsupported unit provided. please use m, km, ft, mi");
+ return -1;
+ }
+}
+
+/* Input Argument Helper.
+ * Extract the dinstance from the specified two arguments starting at 'argv'
+ * that shouldbe in the form: <number> <unit> and return the dinstance in the
+ * specified unit on success. *conversino is populated with the coefficient
+ * to use in order to convert meters to the unit.
+ *
+ * On error a value less than zero is returned. */
+double extractDistanceOrReply(redisClient *c, robj **argv,
+ double *conversion) {
+ double distance;
+ if (getDoubleFromObjectOrReply(c, argv[0], &distance,
+ "need numeric radius") != REDIS_OK) {
+ return -1;
+ }
+
+ double to_meters = extractUnitOrReply(c,argv[1]);
+ if (to_meters < 0) return -1;
+
+ if (conversion) *conversion = to_meters;
+ return distance * to_meters;
+}
+
+/* The defailt addReplyDouble has too much accuracy. We use this
+ * for returning location distances. "5.2145 meters away" is nicer
+ * than "5.2144992818115 meters away." We provide 4 digits after the dot
+ * so that the returned value is decently accurate even when the unit is
+ * the kilometer. */
+void addReplyDoubleDistance(redisClient *c, double d) {
+ char dbuf[128];
+ int dlen = snprintf(dbuf, sizeof(dbuf), "%.4f", d);
+ addReplyBulkCBuffer(c, dbuf, dlen);
+}
+
+/* Helper function for geoGetPointsInRange(): given a sorted set score
+ * representing a point, and another point (the center of our search) and
+ * a radius, appends this entry as a geoPoint into the specified geoArray
+ * only if the point is within the search area.
+ *
+ * returns REDIS_OK if the point is included, or REIDS_ERR if it is outside. */
+int geoAppendIfWithinRadius(geoArray *ga, double lon, double lat, double radius, double score, sds member) {
+ double distance, xy[2];
+
+ if (!decodeGeohash(score,xy)) return REDIS_ERR; /* Can't decode. */
+ /* Note that geohashGetDistanceIfInRadiusWGS84() takes arguments in
+ * reverse order: longitude first, latitude later. */
+ if (!geohashGetDistanceIfInRadiusWGS84(lon,lat, xy[0], xy[1],
+ radius, &distance))
+ {
+ return REDIS_ERR;
+ }
+
+ /* Append the new element. */
+ geoPoint *gp = geoArrayAppend(ga);
+ gp->longitude = xy[0];
+ gp->latitude = xy[1];
+ gp->dist = distance;
+ gp->member = member;
+ gp->score = score;
+ return REDIS_OK;
+}
+
+/* Query a Redis sorted set to extract all the elements between 'min' and
+ * 'max', appending them into the array of geoPoint structures 'gparray'.
+ * The command returns the number of elements added to the array.
+ *
+ * Elements which are farest than 'radius' from the specified 'x' and 'y'
+ * coordinates are not included.
+ *
+ * The ability of this function to append to an existing set of points is
+ * important for good performances because querying by radius is performed
+ * using multiple queries to the sorted set, that we later need to sort
+ * via qsort. Similarly we need to be able to reject points outside the search
+ * radius area ASAP in order to allocate and process more points than needed. */
+int geoGetPointsInRange(robj *zobj, double min, double max, double lon, double lat, double radius, geoArray *ga) {
+ /* minex 0 = include min in range; maxex 1 = exclude max in range */
+ /* That's: min <= val < max */
+ zrangespec range = { .min = min, .max = max, .minex = 0, .maxex = 1 };
+ size_t origincount = ga->used;
+ sds member;
+
+ if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+ unsigned char *vstr = NULL;
+ unsigned int vlen = 0;
+ long long vlong = 0;
+ double score = 0;
+
+ if ((eptr = zzlFirstInRange(zl, &range)) == NULL) {
+ /* Nothing exists starting at our min. No results. */
+ return 0;
+ }
+
+ sptr = ziplistNext(zl, eptr);
+ while (eptr) {
+ score = zzlGetScore(sptr);
+
+ /* If we fell out of range, break. */
+ if (!zslValueLteMax(score, &range))
+ break;
+
+ /* We know the element exists. ziplistGet should always succeed */
+ ziplistGet(eptr, &vstr, &vlen, &vlong);
+ member = (vstr == NULL) ? sdsfromlonglong(vlong) :
+ sdsnewlen(vstr,vlen);
+ if (geoAppendIfWithinRadius(ga,lon,lat,radius,score,member)
+ == REDIS_ERR) sdsfree(member);
+ zzlNext(zl, &eptr, &sptr);
+ }
+ } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ zskiplistNode *ln;
+
+ if ((ln = zslFirstInRange(zsl, &range)) == NULL) {
+ /* Nothing exists starting at our min. No results. */
+ return 0;
+ }
+
+ while (ln) {
+ robj *o = ln->obj;
+ /* Abort when the node is no longer in range. */
+ if (!zslValueLteMax(ln->score, &range))
+ break;
+
+ member = (o->encoding == REDIS_ENCODING_INT) ?
+ sdsfromlonglong((long)o->ptr) :
+ sdsdup(o->ptr);
+ if (geoAppendIfWithinRadius(ga,lon,lat,radius,ln->score,member)
+ == REDIS_ERR) sdsfree(member);
+ ln = ln->level[0].forward;
+ }
+ }
+ return ga->used - origincount;
+}
+
+/* Compute the sorted set scores min (inclusive), max (exclusive) we should
+ * query in order to retrieve all the elements inside the specified area
+ * 'hash'. The two scores are returned by reference in *min and *max. */
+void scoresOfGeoHashBox(GeoHashBits hash, GeoHashFix52Bits *min, GeoHashFix52Bits *max) {
+ /* We want to compute the sorted set scores that will include all the
+ * elements inside the specified Geohash 'hash', which has as many
+ * bits as specified by hash.step * 2.
+ *
+ * So if step is, for example, 3, and the hash value in binary
+ * is 101010, since our score is 52 bits we want every element which
+ * is in binary: 101010?????????????????????????????????????????????
+ * Where ? can be 0 or 1.
+ *
+ * To get the min score we just use the initial hash value left
+ * shifted enough to get the 52 bit value. Later we increment the
+ * 6 bit prefis (see the hash.bits++ statement), and get the new
+ * prefix: 101011, which we align again to 52 bits to get the maximum
+ * value (which is excluded from the search). So we get everything
+ * between the two following scores (represented in binary):
+ *
+ * 1010100000000000000000000000000000000000000000000000 (included)
+ * and
+ * 1010110000000000000000000000000000000000000000000000 (excluded).
+ */
+ *min = geohashAlign52Bits(hash);
+ hash.bits++;
+ *max = geohashAlign52Bits(hash);
+}
+
+/* Obtain all members between the min/max of this geohash bounding box.
+ * Populate a geoArray of GeoPoints by calling geoGetPointsInRange().
+ * Return the number of points added to the array. */
+int membersOfGeoHashBox(robj *zobj, GeoHashBits hash, geoArray *ga, double lon, double lat, double radius) {
+ GeoHashFix52Bits min, max;
+
+ scoresOfGeoHashBox(hash,&min,&max);
+ return geoGetPointsInRange(zobj, min, max, lon, lat, radius, ga);
+}
+
+/* Search all eight neighbors + self geohash box */
+int membersOfAllNeighbors(robj *zobj, GeoHashRadius n, double lon, double lat, double radius, geoArray *ga) {
+ GeoHashBits neighbors[9];
+ unsigned int i, count = 0;
+
+ neighbors[0] = n.hash;
+ neighbors[1] = n.neighbors.north;
+ neighbors[2] = n.neighbors.south;
+ neighbors[3] = n.neighbors.east;
+ neighbors[4] = n.neighbors.west;
+ neighbors[5] = n.neighbors.north_east;
+ neighbors[6] = n.neighbors.north_west;
+ neighbors[7] = n.neighbors.south_east;
+ neighbors[8] = n.neighbors.south_west;
+
+ /* For each neighbor (*and* our own hashbox), get all the matching
+ * members and add them to the potential result list. */
+ for (i = 0; i < sizeof(neighbors) / sizeof(*neighbors); i++) {
+ if (HASHISZERO(neighbors[i]))
+ continue;
+ count += membersOfGeoHashBox(zobj, neighbors[i], ga, lon, lat, radius);
+ }
+ return count;
+}
+
+/* Sort comparators for qsort() */
+static int sort_gp_asc(const void *a, const void *b) {
+ const struct geoPoint *gpa = a, *gpb = b;
+ /* We can't do adist - bdist because they are doubles and
+ * the comparator returns an int. */
+ if (gpa->dist > gpb->dist)
+ return 1;
+ else if (gpa->dist == gpb->dist)
+ return 0;
+ else
+ return -1;
+}
+
+static int sort_gp_desc(const void *a, const void *b) {
+ return -sort_gp_asc(a, b);
+}
+
+/* ====================================================================
+ * Commands
+ * ==================================================================== */
+
+/* GEOADD key long lat name [long2 lat2 name2 ... longN latN nameN] */
+void geoaddCommand(redisClient *c) {
+ /* Check arguments number for sanity. */
+ if ((c->argc - 2) % 3 != 0) {
+ /* Need an odd number of arguments if we got this far... */
+ addReplyError(c, "syntax error. Try GEOADD key [x1] [y1] [name1] "
+ "[x2] [y2] [name2] ... ");
+ return;
+ }
+
+ int elements = (c->argc - 2) / 3;
+ int argc = 2+elements*2; /* ZADD key score ele ... */
+ robj **argv = zcalloc(argc*sizeof(robj*));
+ argv[0] = createRawStringObject("zadd",4);
+ argv[1] = c->argv[1]; /* key */
+ incrRefCount(argv[1]);
+
+ /* Create the argument vector to call ZADD in order to add all
+ * the score,value pairs to the requested zset, where score is actually
+ * an encoded version of lat,long. */
+ int i;
+ for (i = 0; i < elements; i++) {
+ double xy[2];
+
+ if (extractLongLatOrReply(c, (c->argv+2)+(i*3),xy) == REDIS_ERR) {
+ for (i = 0; i < argc; i++)
+ if (argv[i]) decrRefCount(argv[i]);
+ zfree(argv);
+ return;
+ }
+
+ /* Turn the coordinates into the score of the element. */
+ GeoHashBits hash;
+ geohashEncodeWGS84(xy[0], xy[1], GEO_STEP_MAX, &hash);
+ GeoHashFix52Bits bits = geohashAlign52Bits(hash);
+ robj *score = createObject(REDIS_STRING, sdsfromlonglong(bits));
+ robj *val = c->argv[2 + i * 3 + 2];
+ argv[2+i*2] = score;
+ argv[3+i*2] = val;
+ incrRefCount(val);
+ }
+
+ /* Finally call ZADD that will do the work for us. */
+ replaceClientCommandVector(c,argc,argv);
+ zaddCommand(c);
+}
+
+#define SORT_NONE 0
+#define SORT_ASC 1
+#define SORT_DESC 2
+
+#define RADIUS_COORDS 1
+#define RADIUS_MEMBER 2
+
+/* GEORADIUS key x y radius unit [WITHDIST] [WITHHASH] [WITHCOORD] [ASC|DESC]
+ * [COUNT count]
+ * GEORADIUSBYMEMBER key member radius unit ... options ... */
+void georadiusGeneric(redisClient *c, int type) {
+ robj *key = c->argv[1];
+
+ /* Look up the requested zset */
+ robj *zobj = NULL;
+ if ((zobj = lookupKeyReadOrReply(c, key, shared.emptymultibulk)) == NULL ||
+ checkType(c, zobj, REDIS_ZSET)) {
+ return;
+ }
+
+ /* Find long/lat to use for radius search based on inquiry type */
+ int base_args;
+ double xy[2] = { 0 };
+ if (type == RADIUS_COORDS) {
+ base_args = 6;
+ if (extractLongLatOrReply(c, c->argv + 2, xy) == REDIS_ERR)
+ return;
+ } else if (type == RADIUS_MEMBER) {
+ base_args = 5;
+ robj *member = c->argv[2];
+ if (longLatFromMember(zobj, member, xy) == REDIS_ERR) {
+ addReplyError(c, "could not decode requested zset member");
+ return;
+ }
+ } else {
+ addReplyError(c, "unknown georadius search type");
+ return;
+ }
+
+ /* Extract radius and units from arguments */
+ double radius_meters = 0, conversion = 1;
+ if ((radius_meters = extractDistanceOrReply(c, c->argv + base_args - 2,
+ &conversion)) < 0) {
+ return;
+ }
+
+ /* Discover and populate all optional parameters. */
+ int withdist = 0, withhash = 0, withcoords = 0;
+ int sort = SORT_NONE;
+ long long count = 0;
+ if (c->argc > base_args) {
+ int remaining = c->argc - base_args;
+ for (int i = 0; i < remaining; i++) {
+ char *arg = c->argv[base_args + i]->ptr;
+ if (!strcasecmp(arg, "withdist")) {
+ withdist = 1;
+ } else if (!strcasecmp(arg, "withhash")) {
+ withhash = 1;
+ } else if (!strcasecmp(arg, "withcoord")) {
+ withcoords = 1;
+ } else if (!strcasecmp(arg, "asc")) {
+ sort = SORT_ASC;
+ } else if (!strcasecmp(arg, "desc")) {
+ sort = SORT_DESC;
+ } else if (!strcasecmp(arg, "count") && remaining > 0) {
+ if (getLongLongFromObjectOrReply(c, c->argv[base_args+i+1],
+ &count, NULL) != REDIS_OK) return;
+ if (count <= 0) {
+ addReplyError(c,"COUNT must be > 0");
+ return;
+ }
+ i++;
+ } else {
+ addReply(c, shared.syntaxerr);
+ return;
+ }
+ }
+ }
+
+ /* COUNT without ordering does not make much sense, force ASC
+ * ordering if COUNT was specified but no sorting was requested. */
+ if (count != 0 && sort == SORT_NONE) sort = SORT_ASC;
+
+ /* Get all neighbor geohash boxes for our radius search */
+ GeoHashRadius georadius =
+ geohashGetAreasByRadiusWGS84(xy[0], xy[1], radius_meters);
+
+ /* Search the zset for all matching points */
+ geoArray *ga = geoArrayCreate();
+ membersOfAllNeighbors(zobj, georadius, xy[0], xy[1], radius_meters, ga);
+
+ /* If no matching results, the user gets an empty reply. */
+ if (ga->used == 0) {
+ addReply(c, shared.emptymultibulk);
+ geoArrayFree(ga);
+ return;
+ }
+
+ long result_length = ga->used;
+ long option_length = 0;
+
+ /* Our options are self-contained nested multibulk replies, so we
+ * only need to track how many of those nested replies we return. */
+ if (withdist)
+ option_length++;
+
+ if (withcoords)
+ option_length++;
+
+ if (withhash)
+ option_length++;
+
+ /* The multibulk len we send is exactly result_length. The result is either
+ * all strings of just zset members *or* a nested multi-bulk reply
+ * containing the zset member string _and_ all the additional options the
+ * user enabled for this request. */
+ addReplyMultiBulkLen(c, (count == 0 || result_length < count) ?
+ result_length : count);
+
+ /* Process [optional] requested sorting */
+ if (sort == SORT_ASC) {
+ qsort(ga->array, result_length, sizeof(geoPoint), sort_gp_asc);
+ } else if (sort == SORT_DESC) {
+ qsort(ga->array, result_length, sizeof(geoPoint), sort_gp_desc);
+ }
+
+ /* Finally send results back to the caller */
+ int i;
+ for (i = 0; i < result_length; i++) {
+ geoPoint *gp = ga->array+i;
+ gp->dist /= conversion; /* Fix according to unit. */
+
+ /* If we have options in option_length, return each sub-result
+ * as a nested multi-bulk. Add 1 to account for result value itself. */
+ if (option_length)
+ addReplyMultiBulkLen(c, option_length + 1);
+
+ addReplyBulkSds(c,gp->member);
+ gp->member = NULL;
+
+ if (withdist)
+ addReplyDoubleDistance(c, gp->dist);
+
+ if (withhash)
+ addReplyLongLong(c, gp->score);
+
+ if (withcoords) {
+ addReplyMultiBulkLen(c, 2);
+ addReplyDouble(c, gp->longitude);
+ addReplyDouble(c, gp->latitude);
+ }
+
+ /* Stop if COUNT was specified and we already provided the
+ * specified number of elements. */
+ if (count != 0 && count == i+1) break;
+ }
+ geoArrayFree(ga);
+}
+
+/* GEORADIUS wrapper function. */
+void georadiusCommand(redisClient *c) {
+ georadiusGeneric(c, RADIUS_COORDS);
+}
+
+/* GEORADIUSBYMEMBER wrapper function. */
+void georadiusByMemberCommand(redisClient *c) {
+ georadiusGeneric(c, RADIUS_MEMBER);
+}
+
+/* GEOHASH key ele1 ele2 ... eleN
+ *
+ * Returns an array with an 11 characters geohash representation of the
+ * position of the specified elements. */
+void geohashCommand(redisClient *c) {
+ char *geoalphabet= "0123456789bcdefghjkmnpqrstuvwxyz";
+ int j;
+
+ /* Look up the requested zset */
+ robj *zobj = NULL;
+ if ((zobj = lookupKeyReadOrReply(c, c->argv[1], shared.emptymultibulk))
+ == NULL || checkType(c, zobj, REDIS_ZSET)) return;
+
+ /* Geohash elements one after the other, using a null bulk reply for
+ * missing elements. */
+ addReplyMultiBulkLen(c,c->argc-2);
+ for (j = 2; j < c->argc; j++) {
+ double score;
+ if (zsetScore(zobj, c->argv[j], &score) == REDIS_ERR) {
+ addReply(c,shared.nullbulk);
+ } else {
+ /* The internal format we use for geocoding is a bit different
+ * than the standard, since we use as initial latitude range
+ * -85,85, while the normal geohashing algorithm uses -90,90.
+ * So we have to decode our position and re-encode using the
+ * standard ranges in order to output a valid geohash string. */
+
+ /* Decode... */
+ double xy[2];
+ if (!decodeGeohash(score,xy)) {
+ addReply(c,shared.nullbulk);
+ continue;
+ }
+
+ /* Re-encode */
+ GeoHashRange r[2];
+ GeoHashBits hash;
+ r[0].min = -180;
+ r[0].max = 180;
+ r[1].min = -90;
+ r[1].max = 90;
+ geohashEncode(&r[0],&r[1],xy[0],xy[1],26,&hash);
+
+ char buf[12];
+ int i;
+ for (i = 0; i < 11; i++) {
+ int idx = (hash.bits >> (52-((i+1)*5))) & 0x1f;
+ buf[i] = geoalphabet[idx];
+ }
+ buf[11] = '\0';
+ addReplyBulkCBuffer(c,buf,11);
+ }
+ }
+}
+
+/* GEOPOS key ele1 ele2 ... eleN
+ *
+ * Returns an array of two-items arrays representing the x,y position of each
+ * element specified in the arguments. For missing elements NULL is returned. */
+void geoposCommand(redisClient *c) {
+ int j;
+
+ /* Look up the requested zset */
+ robj *zobj = NULL;
+ if ((zobj = lookupKeyReadOrReply(c, c->argv[1], shared.emptymultibulk))
+ == NULL || checkType(c, zobj, REDIS_ZSET)) return;
+
+ /* Report elements one after the other, using a null bulk reply for
+ * missing elements. */
+ addReplyMultiBulkLen(c,c->argc-2);
+ for (j = 2; j < c->argc; j++) {
+ double score;
+ if (zsetScore(zobj, c->argv[j], &score) == REDIS_ERR) {
+ addReply(c,shared.nullmultibulk);
+ } else {
+ /* Decode... */
+ double xy[2];
+ if (!decodeGeohash(score,xy)) {
+ addReply(c,shared.nullmultibulk);
+ continue;
+ }
+ addReplyMultiBulkLen(c,2);
+ addReplyDouble(c,xy[0]);
+ addReplyDouble(c,xy[1]);
+ }
+ }
+}
+
+/* GEODIST key ele1 ele2 [unit]
+ *
+ * Return the distance, in meters by default, otherwise accordig to "unit",
+ * between points ele1 and ele2. If one or more elements are missing NULL
+ * is returned. */
+void geodistCommand(redisClient *c) {
+ double to_meter = 1;
+
+ /* Check if there is the unit to extract, otherwise assume meters. */
+ if (c->argc == 5) {
+ to_meter = extractUnitOrReply(c,c->argv[4]);
+ if (to_meter < 0) return;
+ } else if (c->argc > 5) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Look up the requested zset */
+ robj *zobj = NULL;
+ if ((zobj = lookupKeyReadOrReply(c, c->argv[1], shared.emptybulk))
+ == NULL || checkType(c, zobj, REDIS_ZSET)) return;
+
+ /* Get the scores. We need both otherwise NULL is returned. */
+ double score1, score2, xyxy[4];
+ if (zsetScore(zobj, c->argv[2], &score1) == REDIS_ERR ||
+ zsetScore(zobj, c->argv[3], &score2) == REDIS_ERR)
+ {
+ addReply(c,shared.nullbulk);
+ return;
+ }
+
+ /* Decode & compute the distance. */
+ if (!decodeGeohash(score1,xyxy) || !decodeGeohash(score2,xyxy+2))
+ addReply(c,shared.nullbulk);
+ else
+ addReplyDouble(c,
+ geohashGetDistance(xyxy[0],xyxy[1],xyxy[2],xyxy[3]) / to_meter);
+}
diff --git a/src/geo.h b/src/geo.h
new file mode 100644
index 000000000..cf4e42c90
--- /dev/null
+++ b/src/geo.h
@@ -0,0 +1,22 @@
+#ifndef __GEO_H__
+#define __GEO_H__
+
+#include "redis.h"
+
+/* Structures used inside geo.c in order to represent points and array of
+ * points on the earth. */
+typedef struct geoPoint {
+ double longitude;
+ double latitude;
+ double dist;
+ double score;
+ char *member;
+} geoPoint;
+
+typedef struct geoArray {
+ struct geoPoint *array;
+ size_t buckets;
+ size_t used;
+} geoArray;
+
+#endif
diff --git a/src/latency.c b/src/latency.c
index fd76b3215..54ed03778 100644
--- a/src/latency.c
+++ b/src/latency.c
@@ -248,7 +248,7 @@ sds createLatencyReport(void) {
dictEntry *de;
int eventnum = 0;
- di = dictGetIterator(server.latency_events);
+ di = dictGetSafeIterator(server.latency_events);
while((de = dictNext(di)) != NULL) {
char *event = dictGetKey(de);
struct latencyTimeSeries *ts = dictGetVal(de);
diff --git a/src/networking.c b/src/networking.c
index b1d4042dc..eb033ae61 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -135,23 +135,49 @@ redisClient *createClient(int fd) {
* returns REDIS_OK, and make sure to install the write handler in our event
* loop so that when the socket is writable new data gets written.
*
- * If the client should not receive new data, because it is a fake client,
- * a master, a slave not yet online, or because the setup of the write handler
- * failed, the function returns REDIS_ERR.
+ * If the client should not receive new data, because it is a fake client
+ * (used to load AOF in memory), a master or because the setup of the write
+ * handler failed, the function returns REDIS_ERR.
+ *
+ * The function may return REDIS_OK without actually installing the write
+ * event handler in the following cases:
+ *
+ * 1) The event handler should already be installed since the output buffer
+ * already contained something.
+ * 2) The client is a slave but not yet online, so we want to just accumulate
+ * writes in the buffer but not actually sending them yet.
*
* Typically gets called every time a reply is built, before adding more
* data to the clients output buffers. If the function returns REDIS_ERR no
* data should be appended to the output buffers. */
int prepareClientToWrite(redisClient *c) {
+ /* If it's the Lua client we always return ok without installing any
+ * handler since there is no socket at all. */
if (c->flags & REDIS_LUA_CLIENT) return REDIS_OK;
+
+ /* Masters don't receive replies, unless REDIS_MASTER_FORCE_REPLY flag
+ * is set. */
if ((c->flags & REDIS_MASTER) &&
!(c->flags & REDIS_MASTER_FORCE_REPLY)) return REDIS_ERR;
- if (c->fd <= 0) return REDIS_ERR; /* Fake client */
+
+ if (c->fd <= 0) return REDIS_ERR; /* Fake client for AOF loading. */
+
+ /* Only install the handler if not already installed and, in case of
+ * slaves, if the client can actually receive writes. */
if (c->bufpos == 0 && listLength(c->reply) == 0 &&
(c->replstate == REDIS_REPL_NONE ||
- c->replstate == REDIS_REPL_ONLINE) && !c->repl_put_online_on_ack &&
- aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
- sendReplyToClient, c) == AE_ERR) return REDIS_ERR;
+ (c->replstate == REDIS_REPL_ONLINE && !c->repl_put_online_on_ack)))
+ {
+ /* Try to install the write handler. */
+ if (aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
+ sendReplyToClient, c) == AE_ERR)
+ {
+ freeClientAsync(c);
+ return REDIS_ERR;
+ }
+ }
+
+ /* Authorize the caller to queue in the output buffer of this client. */
return REDIS_OK;
}
@@ -175,7 +201,7 @@ robj *dupLastObjectIfNeeded(list *reply) {
* Low level functions to add more data to output buffers.
* -------------------------------------------------------------------------- */
-int _addReplyToBuffer(redisClient *c, char *s, size_t len) {
+int _addReplyToBuffer(redisClient *c, const char *s, size_t len) {
size_t available = sizeof(c->buf)-c->bufpos;
if (c->flags & REDIS_CLOSE_AFTER_REPLY) return REDIS_OK;
@@ -255,7 +281,7 @@ void _addReplySdsToList(redisClient *c, sds s) {
asyncCloseClientOnOutputBufferLimitReached(c);
}
-void _addReplyStringToList(redisClient *c, char *s, size_t len) {
+void _addReplyStringToList(redisClient *c, const char *s, size_t len) {
robj *tail;
if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;
@@ -341,19 +367,19 @@ void addReplySds(redisClient *c, sds s) {
}
}
-void addReplyString(redisClient *c, char *s, size_t len) {
+void addReplyString(redisClient *c, const char *s, size_t len) {
if (prepareClientToWrite(c) != REDIS_OK) return;
if (_addReplyToBuffer(c,s,len) != REDIS_OK)
_addReplyStringToList(c,s,len);
}
-void addReplyErrorLength(redisClient *c, char *s, size_t len) {
+void addReplyErrorLength(redisClient *c, const char *s, size_t len) {
addReplyString(c,"-ERR ",5);
addReplyString(c,s,len);
addReplyString(c,"\r\n",2);
}
-void addReplyError(redisClient *c, char *err) {
+void addReplyError(redisClient *c, const char *err) {
addReplyErrorLength(c,err,strlen(err));
}
@@ -373,13 +399,13 @@ void addReplyErrorFormat(redisClient *c, const char *fmt, ...) {
sdsfree(s);
}
-void addReplyStatusLength(redisClient *c, char *s, size_t len) {
+void addReplyStatusLength(redisClient *c, const char *s, size_t len) {
addReplyString(c,"+",1);
addReplyString(c,s,len);
addReplyString(c,"\r\n",2);
}
-void addReplyStatus(redisClient *c, char *status) {
+void addReplyStatus(redisClient *c, const char *status) {
addReplyStatusLength(c,status,strlen(status));
}
@@ -454,10 +480,10 @@ void addReplyLongLongWithPrefix(redisClient *c, long long ll, char prefix) {
/* Things like $3\r\n or *2\r\n are emitted very often by the protocol
* so we have a few shared objects to use if the integer is small
* like it is most of the times. */
- if (prefix == '*' && ll < REDIS_SHARED_BULKHDR_LEN) {
+ if (prefix == '*' && ll < REDIS_SHARED_BULKHDR_LEN && ll >= 0) {
addReply(c,shared.mbulkhdr[ll]);
return;
- } else if (prefix == '$' && ll < REDIS_SHARED_BULKHDR_LEN) {
+ } else if (prefix == '$' && ll < REDIS_SHARED_BULKHDR_LEN && ll >= 0) {
addReply(c,shared.bulkhdr[ll]);
return;
}
@@ -519,7 +545,7 @@ void addReplyBulk(redisClient *c, robj *obj) {
}
/* Add a C buffer as bulk reply */
-void addReplyBulkCBuffer(redisClient *c, void *p, size_t len) {
+void addReplyBulkCBuffer(redisClient *c, const void *p, size_t len) {
addReplyLongLongWithPrefix(c,len,'$');
addReplyString(c,p,len);
addReply(c,shared.crlf);
@@ -534,7 +560,7 @@ void addReplyBulkSds(redisClient *c, sds s) {
}
/* Add a C nul term string as bulk reply */
-void addReplyBulkCString(redisClient *c, char *s) {
+void addReplyBulkCString(redisClient *c, const char *s) {
if (s == NULL) {
addReply(c,shared.nullbulk);
} else {
@@ -779,7 +805,7 @@ void freeClient(redisClient *c) {
* a context where calling freeClient() is not possible, because the client
* should be valid for the continuation of the flow of the program. */
void freeClientAsync(redisClient *c) {
- if (c->flags & REDIS_CLOSE_ASAP) return;
+ if (c->flags & REDIS_CLOSE_ASAP || c->flags & REDIS_LUA_CLIENT) return;
c->flags |= REDIS_CLOSE_ASAP;
listAddNodeTail(server.clients_to_close,c);
}
@@ -957,7 +983,7 @@ int processInlineBuffer(redisClient *c) {
/* Helper function. Trims query buffer to make the function that processes
* multi bulk requests idempotent. */
static void setProtocolError(redisClient *c, int pos) {
- if (server.verbosity >= REDIS_VERBOSE) {
+ if (server.verbosity <= REDIS_VERBOSE) {
sds client = catClientInfoString(sdsempty(),c);
redisLog(REDIS_VERBOSE,
"Protocol error from client: %s", client);
@@ -1501,6 +1527,16 @@ void rewriteClientCommandVector(redisClient *c, int argc, ...) {
va_end(ap);
}
+/* Completely replace the client command vector with the provided one. */
+void replaceClientCommandVector(redisClient *c, int argc, robj **argv) {
+ freeClientArgv(c);
+ zfree(c->argv);
+ c->argv = argv;
+ c->argc = argc;
+ c->cmd = lookupCommandOrOriginal(c->argv[0]->ptr);
+ redisAssertWithInfo(c,NULL,c->cmd != NULL);
+}
+
/* Rewrite a single item in the command vector.
* The new val ref count is incremented, and the old decremented. */
void rewriteClientCommandArgument(redisClient *c, int i, robj *newval) {
@@ -1676,7 +1712,9 @@ void pauseClients(mstime_t end) {
/* Return non-zero if clients are currently paused. As a side effect the
* function checks if the pause time was reached and clear it. */
int clientsArePaused(void) {
- if (server.clients_paused && server.clients_pause_end_time < server.mstime) {
+ if (server.clients_paused &&
+ server.clients_pause_end_time < server.mstime)
+ {
listNode *ln;
listIter li;
redisClient *c;
@@ -1689,7 +1727,10 @@ int clientsArePaused(void) {
while ((ln = listNext(&li)) != NULL) {
c = listNodeValue(ln);
- if (c->flags & REDIS_SLAVE) continue;
+ /* Don't touch slaves and blocked clients. The latter pending
+ * requests be processed when unblocked. */
+ if (c->flags & (REDIS_SLAVE|REDIS_BLOCKED)) continue;
+ c->flags |= REDIS_UNBLOCKED;
listAddNodeTail(server.unblocked_clients,c);
}
}
diff --git a/src/object.c b/src/object.c
index 8905db18d..dcd896917 100644
--- a/src/object.c
+++ b/src/object.c
@@ -50,14 +50,14 @@ robj *createObject(int type, void *ptr) {
/* Create a string object with encoding REDIS_ENCODING_RAW, that is a plain
* string object where o->ptr points to a proper sds string. */
-robj *createRawStringObject(char *ptr, size_t len) {
+robj *createRawStringObject(const char *ptr, size_t len) {
return createObject(REDIS_STRING,sdsnewlen(ptr,len));
}
/* Create a string object with encoding REDIS_ENCODING_EMBSTR, that is
* an object where the sds string is actually an unmodifiable string
* allocated in the same chunk as the object itself. */
-robj *createEmbeddedStringObject(char *ptr, size_t len) {
+robj *createEmbeddedStringObject(const char *ptr, size_t len) {
robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr)+len+1);
struct sdshdr *sh = (void*)(o+1);
@@ -85,7 +85,7 @@ robj *createEmbeddedStringObject(char *ptr, size_t len) {
* The current limit of 39 is chosen so that the biggest string object
* we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
#define REDIS_ENCODING_EMBSTR_SIZE_LIMIT 39
-robj *createStringObject(char *ptr, size_t len) {
+robj *createStringObject(const char *ptr, size_t len) {
if (len <= REDIS_ENCODING_EMBSTR_SIZE_LIMIT)
return createEmbeddedStringObject(ptr,len);
else
diff --git a/src/rdb.c b/src/rdb.c
index aa2e6f5c4..2d9853f3f 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -869,9 +869,9 @@ int rdbSave(char *filename) {
return REDIS_OK;
werr:
+ redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
fclose(fp);
unlink(tmpfile);
- redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
return REDIS_ERR;
}
diff --git a/src/redis-cli.c b/src/redis-cli.c
index 251e42fad..acf7c98b9 100644
--- a/src/redis-cli.c
+++ b/src/redis-cli.c
@@ -644,9 +644,9 @@ static int cliSendCommand(int argc, char **argv, int repeat) {
output_raw = 0;
if (!strcasecmp(command,"info") ||
- (argc == 3 && !strcasecmp(command,"debug") &&
- (!strcasecmp(argv[1],"jemalloc") &&
- !strcasecmp(argv[2],"info"))) ||
+ (argc >= 2 && !strcasecmp(command,"debug") &&
+ (!strcasecmp(argv[1],"jemalloc") ||
+ !strcasecmp(argv[1],"htstats"))) ||
(argc == 2 && !strcasecmp(command,"cluster") &&
(!strcasecmp(argv[1],"nodes") ||
!strcasecmp(argv[1],"info"))) ||
diff --git a/src/redis.c b/src/redis.c
index 805a6c2a3..f963042e9 100644
--- a/src/redis.c
+++ b/src/redis.c
@@ -53,7 +53,7 @@
#include <sys/resource.h>
#include <sys/utsname.h>
#include <locale.h>
-#include <sys/sysctl.h>
+#include <sys/socket.h>
/* Our shared "common" objects */
@@ -131,7 +131,7 @@ struct redisCommand redisCommandTable[] = {
{"append",appendCommand,3,"wm",0,NULL,1,1,1,0,0},
{"strlen",strlenCommand,2,"rF",0,NULL,1,1,1,0,0},
{"del",delCommand,-2,"w",0,NULL,1,-1,1,0,0},
- {"exists",existsCommand,2,"rF",0,NULL,1,1,1,0,0},
+ {"exists",existsCommand,-2,"rF",0,NULL,1,-1,1,0,0},
{"setbit",setbitCommand,4,"wm",0,NULL,1,1,1,0,0},
{"getbit",getbitCommand,3,"rF",0,NULL,1,1,1,0,0},
{"setrange",setrangeCommand,4,"wm",0,NULL,1,1,1,0,0},
@@ -281,9 +281,15 @@ struct redisCommand redisCommandTable[] = {
{"bitpos",bitposCommand,-3,"r",0,NULL,1,1,1,0,0},
{"wait",waitCommand,3,"rs",0,NULL,0,0,0,0,0},
{"command",commandCommand,0,"rlt",0,NULL,0,0,0,0,0},
+ {"geoadd",geoaddCommand,-5,"wm",0,NULL,1,1,1,0,0},
+ {"georadius",georadiusCommand,-6,"r",0,NULL,1,1,1,0,0},
+ {"georadiusbymember",georadiusByMemberCommand,-5,"r",0,NULL,1,1,1,0,0},
+ {"geohash",geohashCommand,-2,"r",0,NULL,1,1,1,0,0},
+ {"geopos",geoposCommand,-2,"r",0,NULL,1,1,1,0,0},
+ {"geodist",geodistCommand,-4,"r",0,NULL,1,1,1,0,0},
{"pfselftest",pfselftestCommand,1,"r",0,NULL,0,0,0,0,0},
{"pfadd",pfaddCommand,-2,"wmF",0,NULL,1,1,1,0,0},
- {"pfcount",pfcountCommand,-2,"r",0,NULL,1,1,1,0,0},
+ {"pfcount",pfcountCommand,-2,"r",0,NULL,1,-1,1,0,0},
{"pfmerge",pfmergeCommand,-2,"wm",0,NULL,1,-1,1,0,0},
{"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0},
{"latency",latencyCommand,-2,"arslt",0,NULL,0,0,0,0,0}
@@ -905,9 +911,12 @@ long long getInstantaneousMetric(int metric) {
return sum / REDIS_METRIC_SAMPLES;
}
-/* Check for timeouts. Returns non-zero if the client was terminated */
-int clientsCronHandleTimeout(redisClient *c) {
- time_t now = server.unixtime;
+/* Check for timeouts. Returns non-zero if the client was terminated.
+ * The function gets the current time in milliseconds as argument since
+ * it gets called multiple times in a loop, so calling gettimeofday() for
+ * each iteration would be costly without any actual gain. */
+int clientsCronHandleTimeout(redisClient *c, mstime_t now_ms) {
+ time_t now = now_ms/1000;
if (server.maxidletime &&
!(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
@@ -923,11 +932,16 @@ int clientsCronHandleTimeout(redisClient *c) {
/* Blocked OPS timeout is handled with milliseconds resolution.
* However note that the actual resolution is limited by
* server.hz. */
- mstime_t now_ms = mstime();
if (c->bpop.timeout != 0 && c->bpop.timeout < now_ms) {
+ /* Handle blocking operation specific timeout. */
replyToBlockedClientTimedOut(c);
unblockClient(c);
+ } else if (server.cluster_enabled) {
+ /* Cluster: handle unblock & redirect of clients blocked
+ * into keys no longer served by this server. */
+ if (clusterRedirectBlockedClientIfNeeded(c))
+ unblockClient(c);
}
}
return 0;
@@ -959,17 +973,23 @@ int clientsCronResizeQueryBuffer(redisClient *c) {
return 0;
}
+#define CLIENTS_CRON_MIN_ITERATIONS 5
void clientsCron(void) {
- /* Make sure to process at least 1/(server.hz*10) of clients per call.
- * Since this function is called server.hz times per second we are sure that
- * in the worst case we process all the clients in 10 seconds.
- * In normal conditions (a reasonable number of clients) we process
- * all the clients in a shorter time. */
+ /* Make sure to process at least numclients/server.hz of clients
+ * per call. Since this function is called server.hz times per second
+ * we are sure that in the worst case we process all the clients in 1
+ * second. */
int numclients = listLength(server.clients);
- int iterations = numclients/(server.hz*10);
+ int iterations = numclients/server.hz;
+ mstime_t now = mstime();
+
+ /* Process at least a few clients while we are at it, even if we need
+ * to process less than CLIENTS_CRON_MIN_ITERATIONS to meet our contract
+ * of processing each client once per second. */
+ if (iterations < CLIENTS_CRON_MIN_ITERATIONS)
+ iterations = (numclients < CLIENTS_CRON_MIN_ITERATIONS) ?
+ numclients : CLIENTS_CRON_MIN_ITERATIONS;
- if (iterations < 50)
- iterations = (numclients < 50) ? numclients : 50;
while(listLength(server.clients) && iterations--) {
redisClient *c;
listNode *head;
@@ -983,7 +1003,7 @@ void clientsCron(void) {
/* The following functions do different service checks on the client.
* The protocol is that they return non-zero if the client was
* terminated. */
- if (clientsCronHandleTimeout(c)) continue;
+ if (clientsCronHandleTimeout(c,now)) continue;
if (clientsCronResizeQueryBuffer(c)) continue;
}
}
@@ -1260,6 +1280,12 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
void beforeSleep(struct aeEventLoop *eventLoop) {
REDIS_NOTUSED(eventLoop);
+ /* Call the Redis Cluster before sleep function. Note that this function
+ * may change the state of Redis Cluster (from ok to fail or vice versa),
+ * so it's a good idea to call it before serving the unblocked clients
+ * later in this function. */
+ if (server.cluster_enabled) clusterBeforeSleep();
+
/* Run a fast expire cycle (the called function will return
* ASAP if a fast cycle is not needed). */
if (server.active_expire_enabled && server.masterhost == NULL)
@@ -1291,9 +1317,6 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
/* Write the AOF buffer on disk */
flushAppendOnlyFile(0);
-
- /* Call the Redis Cluster before sleep function. */
- if (server.cluster_enabled) clusterBeforeSleep();
}
/* =========================== Server initialization ======================== */
@@ -1740,6 +1763,7 @@ void resetServerStats(void) {
}
server.stat_net_input_bytes = 0;
server.stat_net_output_bytes = 0;
+ server.aof_delayed_fsync = 0;
}
void initServer(void) {
@@ -1784,7 +1808,7 @@ void initServer(void) {
server.sofd = anetUnixServer(server.neterr,server.unixsocket,
server.unixsocketperm, server.tcp_backlog);
if (server.sofd == ANET_ERR) {
- redisLog(REDIS_WARNING, "Opening socket: %s", server.neterr);
+ redisLog(REDIS_WARNING, "Opening Unix socket: %s", server.neterr);
exit(1);
}
anetNonBlock(NULL,server.sofd);
@@ -2199,36 +2223,22 @@ int processCommand(redisClient *c) {
* 2) The command has no key arguments. */
if (server.cluster_enabled &&
!(c->flags & REDIS_MASTER) &&
+ !(c->flags & REDIS_LUA_CLIENT &&
+ server.lua_caller->flags & REDIS_MASTER) &&
!(c->cmd->getkeys_proc == NULL && c->cmd->firstkey == 0))
{
int hashslot;
if (server.cluster->state != REDIS_CLUSTER_OK) {
flagTransaction(c);
- addReplySds(c,sdsnew("-CLUSTERDOWN The cluster is down. Use CLUSTER INFO for more information\r\n"));
+ clusterRedirectClient(c,NULL,0,REDIS_CLUSTER_REDIR_DOWN_STATE);
return REDIS_OK;
} else {
int error_code;
clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,&hashslot,&error_code);
- if (n == NULL) {
- flagTransaction(c);
- if (error_code == REDIS_CLUSTER_REDIR_CROSS_SLOT) {
- addReplySds(c,sdsnew("-CROSSSLOT Keys in request don't hash to the same slot\r\n"));
- } else if (error_code == REDIS_CLUSTER_REDIR_UNSTABLE) {
- /* The request spawns mutliple keys in the same slot,
- * but the slot is not "stable" currently as there is
- * a migration or import in progress. */
- addReplySds(c,sdsnew("-TRYAGAIN Multiple keys request during rehashing of slot\r\n"));
- } else {
- redisPanic("getNodeByQuery() unknown error.");
- }
- return REDIS_OK;
- } else if (n != server.cluster->myself) {
+ if (n == NULL || n != server.cluster->myself) {
flagTransaction(c);
- addReplySds(c,sdscatprintf(sdsempty(),
- "-%s %d %s:%d\r\n",
- (error_code == REDIS_CLUSTER_REDIR_ASK) ? "ASK" : "MOVED",
- hashslot,n->ip,n->port));
+ clusterRedirectClient(c,n,hashslot,error_code);
return REDIS_OK;
}
}
@@ -2745,7 +2755,7 @@ sds genRedisInfoString(char *section) {
char maxmemory_hmem[64];
size_t zmalloc_used = zmalloc_used_memory();
size_t total_system_mem = server.system_memory_size;
- char *evict_policy = maxmemoryToString();
+ const char *evict_policy = maxmemoryToString();
long long memory_lua = (long long)lua_gc(server.lua,LUA_GCCOUNT,0)*1024;
/* Peak memory is updated from time to time by serverCron() so it
diff --git a/src/redis.h b/src/redis.h
index 5ea7ae21a..09ac289cf 100644
--- a/src/redis.h
+++ b/src/redis.h
@@ -1053,14 +1053,14 @@ void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask);
void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask);
void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
void addReplyBulk(redisClient *c, robj *obj);
-void addReplyBulkCString(redisClient *c, char *s);
-void addReplyBulkCBuffer(redisClient *c, void *p, size_t len);
+void addReplyBulkCString(redisClient *c, const char *s);
+void addReplyBulkCBuffer(redisClient *c, const void *p, size_t len);
void addReplyBulkLongLong(redisClient *c, long long ll);
void addReply(redisClient *c, robj *obj);
void addReplySds(redisClient *c, sds s);
void addReplyBulkSds(redisClient *c, sds s);
-void addReplyError(redisClient *c, char *err);
-void addReplyStatus(redisClient *c, char *status);
+void addReplyError(redisClient *c, const char *err);
+void addReplyStatus(redisClient *c, const char *status);
void addReplyDouble(redisClient *c, double d);
void addReplyLongLong(redisClient *c, long long ll);
void addReplyMultiBulkLen(redisClient *c, long length);
@@ -1074,6 +1074,7 @@ sds catClientInfoString(sds s, redisClient *client);
sds getAllClientsInfoString(void);
void rewriteClientCommandVector(redisClient *c, int argc, ...);
void rewriteClientCommandArgument(redisClient *c, int i, robj *newval);
+void replaceClientCommandVector(redisClient *c, int argc, robj **argv);
unsigned long getClientOutputBufferMemoryUsage(redisClient *c);
void freeClientsInAsyncFreeQueue(void);
void asyncCloseClientOnOutputBufferLimitReached(redisClient *c);
@@ -1136,9 +1137,9 @@ void freeSetObject(robj *o);
void freeZsetObject(robj *o);
void freeHashObject(robj *o);
robj *createObject(int type, void *ptr);
-robj *createStringObject(char *ptr, size_t len);
-robj *createRawStringObject(char *ptr, size_t len);
-robj *createEmbeddedStringObject(char *ptr, size_t len);
+robj *createStringObject(const char *ptr, size_t len);
+robj *createRawStringObject(const char *ptr, size_t len);
+robj *createEmbeddedStringObject(const char *ptr, size_t len);
robj *dupStringObject(robj *o);
int isObjectRepresentableAsLongLong(robj *o, long long *llongval);
robj *tryObjectEncoding(robj *o);
@@ -1241,6 +1242,7 @@ void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
unsigned int zsetLength(robj *zobj);
void zsetConvert(robj *zobj, int encoding);
+int zsetScore(robj *zobj, robj *member, double *score);
unsigned long zslGetRank(zskiplist *zsl, double score, robj *o);
/* Core functions */
@@ -1275,7 +1277,7 @@ void closeListeningSockets(int unlink_unix_socket);
void updateCachedTime(void);
void resetServerStats(void);
unsigned int getLRUClock(void);
-char *maxmemoryToString(void);
+const char *maxmemoryToString(void);
/* Set data type */
robj *setTypeCreate(robj *value);
@@ -1328,7 +1330,7 @@ void loadServerConfig(char *filename, char *options);
void appendServerSaveParams(time_t seconds, int changes);
void resetServerSaveParams(void);
struct rewriteConfigState; /* Forward declaration to export API. */
-void rewriteConfigRewriteLine(struct rewriteConfigState *state, char *option, sds line, int force);
+void rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force);
int rewriteConfig(char *path);
/* db.c -- Keyspace access API */
@@ -1396,6 +1398,7 @@ void blockClient(redisClient *c, int btype);
void unblockClient(redisClient *c);
void replyToBlockedClientTimedOut(redisClient *c);
int getTimeoutFromObjectOrReply(redisClient *c, robj *object, mstime_t *timeout, int unit);
+void disconnectAllBlockedClients(void);
/* Git SHA1 */
char *redisGitSHA1(void);
@@ -1556,6 +1559,14 @@ void bitcountCommand(redisClient *c);
void bitposCommand(redisClient *c);
void replconfCommand(redisClient *c);
void waitCommand(redisClient *c);
+void geoencodeCommand(redisClient *c);
+void geodecodeCommand(redisClient *c);
+void georadiusByMemberCommand(redisClient *c);
+void georadiusCommand(redisClient *c);
+void geoaddCommand(redisClient *c);
+void geohashCommand(redisClient *c);
+void geoposCommand(redisClient *c);
+void geodistCommand(redisClient *c);
void pfselftestCommand(redisClient *c);
void pfaddCommand(redisClient *c);
void pfcountCommand(redisClient *c);
diff --git a/src/replication.c b/src/replication.c
index aca198b88..7cdfdb165 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -652,7 +652,8 @@ void replconfCommand(redisClient *c) {
*
* It does a few things:
*
- * 1) Put the slave in ONLINE state.
+ * 1) Put the slave in ONLINE state (useless when the function is called
+ * because state is already ONLINE but repl_put_online_on_ack is true).
* 2) Make sure the writable event is re-installed, since calling the SYNC
* command disables it, so that we can accumulate output buffer without
* sending it to the slave.
@@ -660,7 +661,7 @@ void replconfCommand(redisClient *c) {
void putSlaveOnline(redisClient *slave) {
slave->replstate = REDIS_REPL_ONLINE;
slave->repl_put_online_on_ack = 0;
- slave->repl_ack_time = server.unixtime;
+ slave->repl_ack_time = server.unixtime; /* Prevent false timeout. */
if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
sendReplyToClient, slave) == AE_ERR) {
redisLog(REDIS_WARNING,"Unable to register writable event for slave bulk transfer: %s", strerror(errno));
@@ -773,7 +774,7 @@ void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
* is technically online now. */
slave->replstate = REDIS_REPL_ONLINE;
slave->repl_put_online_on_ack = 1;
- slave->repl_ack_time = server.unixtime;
+ slave->repl_ack_time = server.unixtime; /* Timeout otherwise. */
} else {
if (bgsaveerr != REDIS_OK) {
freeClient(slave);
@@ -1443,7 +1444,7 @@ error:
int connectWithMaster(void) {
int fd;
- fd = anetTcpNonBlockBindConnect(NULL,
+ fd = anetTcpNonBlockBestEffortBindConnect(NULL,
server.masterhost,server.masterport,REDIS_BIND_ADDR);
if (fd == -1) {
redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
@@ -1505,6 +1506,7 @@ void replicationSetMaster(char *ip, int port) {
server.masterhost = sdsnew(ip);
server.masterport = port;
if (server.master) freeClient(server.master);
+ disconnectAllBlockedClients(); /* Clients blocked in master, now slave. */
disconnectSlaves(); /* Force our slaves to resync with us as well. */
replicationDiscardCachedMaster(); /* Don't try a PSYNC. */
freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */
diff --git a/src/scripting.c b/src/scripting.c
index c5dd4e718..2819c05fb 100644
--- a/src/scripting.c
+++ b/src/scripting.c
@@ -357,8 +357,9 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
if (cmd->flags & REDIS_CMD_WRITE) server.lua_write_dirty = 1;
/* If this is a Redis Cluster node, we need to make sure Lua is not
- * trying to access non-local keys. */
- if (server.cluster_enabled) {
+ * trying to access non-local keys, with the exception of commands
+ * received from our master. */
+ if (server.cluster_enabled && !(server.lua_caller->flags & REDIS_MASTER)) {
/* Duplicate relevant flags in the lua client. */
c->flags &= ~(REDIS_READONLY|REDIS_ASKING);
c->flags |= server.lua_caller->flags & (REDIS_READONLY|REDIS_ASKING);
@@ -611,11 +612,12 @@ void scriptingEnableGlobalsProtection(lua_State *lua) {
/* strict.lua from: http://metalua.luaforge.net/src/lib/strict.lua.html.
* Modified to be adapted to Redis. */
+ s[j++]="local dbg=debug\n";
s[j++]="local mt = {}\n";
s[j++]="setmetatable(_G, mt)\n";
s[j++]="mt.__newindex = function (t, n, v)\n";
- s[j++]=" if debug.getinfo(2) then\n";
- s[j++]=" local w = debug.getinfo(2, \"S\").what\n";
+ s[j++]=" if dbg.getinfo(2) then\n";
+ s[j++]=" local w = dbg.getinfo(2, \"S\").what\n";
s[j++]=" if w ~= \"main\" and w ~= \"C\" then\n";
s[j++]=" error(\"Script attempted to create global variable '\"..tostring(n)..\"'\", 2)\n";
s[j++]=" end\n";
@@ -623,11 +625,12 @@ void scriptingEnableGlobalsProtection(lua_State *lua) {
s[j++]=" rawset(t, n, v)\n";
s[j++]="end\n";
s[j++]="mt.__index = function (t, n)\n";
- s[j++]=" if debug.getinfo(2) and debug.getinfo(2, \"S\").what ~= \"C\" then\n";
+ s[j++]=" if dbg.getinfo(2) and dbg.getinfo(2, \"S\").what ~= \"C\" then\n";
s[j++]=" error(\"Script attempted to access unexisting global variable '\"..tostring(n)..\"'\", 2)\n";
s[j++]=" end\n";
s[j++]=" return rawget(t, n)\n";
s[j++]="end\n";
+ s[j++]="debug = nil\n";
s[j++]=NULL;
for (j = 0; s[j] != NULL; j++) code = sdscatlen(code,s[j],strlen(s[j]));
@@ -731,10 +734,11 @@ void scriptingInit(void) {
* information about the caller, that's what makes sense from the point
* of view of the user debugging a script. */
{
- char *errh_func = "function __redis__err__handler(err)\n"
- " local i = debug.getinfo(2,'nSl')\n"
+ char *errh_func = "local dbg = debug\n"
+ "function __redis__err__handler(err)\n"
+ " local i = dbg.getinfo(2,'nSl')\n"
" if i and i.what == 'C' then\n"
- " i = debug.getinfo(3,'nSl')\n"
+ " i = dbg.getinfo(3,'nSl')\n"
" end\n"
" if i then\n"
" return i.source .. ':' .. i.currentline .. ': ' .. err\n"
@@ -876,7 +880,7 @@ void luaSetGlobalArray(lua_State *lua, char *var, robj **elev, int elec) {
}
/* Define a lua function with the specified function name and body.
- * The function name musts be a 2 characters long string, since all the
+ * The function name musts be a 42 characters long string, since all the
* functions we defined in the Lua context are in the form:
*
* f_<hex sha1 sum>
diff --git a/src/sds.c b/src/sds.c
index 05ee0ad56..2ebe286d1 100644
--- a/src/sds.c
+++ b/src/sds.c
@@ -71,7 +71,7 @@ sds sdsempty(void) {
return sdsnewlen("",0);
}
-/* Create a new sds string starting from a null termined C string. */
+/* Create a new sds string starting from a null terminated C string. */
sds sdsnew(const char *init) {
size_t initlen = (init == NULL) ? 0 : strlen(init);
return sdsnewlen(init, initlen);
@@ -557,7 +557,7 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
* Example:
*
* s = sdsnew("AA...AA.a.aa.aHelloWorld :::");
- * s = sdstrim(s,"A. :");
+ * s = sdstrim(s,"Aa. :");
* printf("%s\n", s);
*
* Output will be just "Hello World".
@@ -1098,6 +1098,7 @@ int sdsTest(int argc, char *argv[]) {
unsigned int oldfree;
sdsfree(x);
+ sdsfree(y);
x = sdsnew("0");
sh = (void*) (x-(sizeof(struct sdshdr)));
test_cond("sdsnew() free/len buffers", sh->len == 1 && sh->free == 0);
@@ -1110,6 +1111,8 @@ int sdsTest(int argc, char *argv[]) {
test_cond("sdsIncrLen() -- content", x[0] == '0' && x[1] == '1');
test_cond("sdsIncrLen() -- len", sh->len == 2);
test_cond("sdsIncrLen() -- free", sh->free == oldfree-1);
+
+ sdsfree(x);
}
}
test_report()
diff --git a/src/sentinel.c b/src/sentinel.c
index c170818c9..3ff8899d7 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -54,19 +54,18 @@ typedef struct sentinelAddr {
#define SRI_MASTER (1<<0)
#define SRI_SLAVE (1<<1)
#define SRI_SENTINEL (1<<2)
-#define SRI_DISCONNECTED (1<<3)
-#define SRI_S_DOWN (1<<4) /* Subjectively down (no quorum). */
-#define SRI_O_DOWN (1<<5) /* Objectively down (confirmed by others). */
-#define SRI_MASTER_DOWN (1<<6) /* A Sentinel with this flag set thinks that
+#define SRI_S_DOWN (1<<3) /* Subjectively down (no quorum). */
+#define SRI_O_DOWN (1<<4) /* Objectively down (confirmed by others). */
+#define SRI_MASTER_DOWN (1<<5) /* A Sentinel with this flag set thinks that
its master is down. */
-#define SRI_FAILOVER_IN_PROGRESS (1<<7) /* Failover is in progress for
+#define SRI_FAILOVER_IN_PROGRESS (1<<6) /* Failover is in progress for
this master. */
-#define SRI_PROMOTED (1<<8) /* Slave selected for promotion. */
-#define SRI_RECONF_SENT (1<<9) /* SLAVEOF <newmaster> sent. */
-#define SRI_RECONF_INPROG (1<<10) /* Slave synchronization in progress. */
-#define SRI_RECONF_DONE (1<<11) /* Slave synchronized with new master. */
-#define SRI_FORCE_FAILOVER (1<<12) /* Force failover with master up. */
-#define SRI_SCRIPT_KILL_SENT (1<<13) /* SCRIPT KILL already sent on -BUSY */
+#define SRI_PROMOTED (1<<7) /* Slave selected for promotion. */
+#define SRI_RECONF_SENT (1<<8) /* SLAVEOF <newmaster> sent. */
+#define SRI_RECONF_INPROG (1<<9) /* Slave synchronization in progress. */
+#define SRI_RECONF_DONE (1<<10) /* Slave synchronized with new master. */
+#define SRI_FORCE_FAILOVER (1<<11) /* Force failover with master up. */
+#define SRI_SCRIPT_KILL_SENT (1<<12) /* SCRIPT KILL already sent on -BUSY */
/* Note: times are in milliseconds. */
#define SENTINEL_INFO_PERIOD 10000
@@ -115,27 +114,59 @@ typedef struct sentinelAddr {
#define SENTINEL_SCRIPT_MAX_RETRY 10
#define SENTINEL_SCRIPT_RETRY_DELAY 30000 /* 30 seconds between retries. */
-typedef struct sentinelRedisInstance {
- int flags; /* See SRI_... defines */
- char *name; /* Master name from the point of view of this sentinel. */
- char *runid; /* run ID of this instance. */
- uint64_t config_epoch; /* Configuration epoch. */
- sentinelAddr *addr; /* Master host. */
+/* SENTINEL SIMULATE-FAILURE command flags. */
+#define SENTINEL_SIMFAILURE_NONE 0
+#define SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION (1<<0)
+#define SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION (1<<1)
+
+/* The link to a sentinelRedisInstance. When we have the same set of Sentinels
+ * monitoring many masters, we have different instances representing the
+ * same Sentinels, one per master, and we need to share the hiredis connections
+ * among them. Oherwise if 5 Sentinels are monitoring 100 masters we create
+ * 500 outgoing connections instead of 5.
+ *
+ * So this structure represents a reference counted link in terms of the two
+ * hiredis connections for commands and Pub/Sub, and the fields needed for
+ * failure detection, since the ping/pong time are now local to the link: if
+ * the link is available, the instance is avaialbe. This way we don't just
+ * have 5 connections instead of 500, we also send 5 pings instead of 500.
+ *
+ * Links are shared only for Sentinels: master and slave instances have
+ * a link with refcount = 1, always. */
+typedef struct instanceLink {
+ int refcount; /* Number of sentinelRedisInstance owners. */
+ int disconnected; /* Non-zero if we need to reconnect cc or pc. */
+ int pending_commands; /* Number of commands sent waiting for a reply. */
redisAsyncContext *cc; /* Hiredis context for commands. */
redisAsyncContext *pc; /* Hiredis context for Pub / Sub. */
- int pending_commands; /* Number of commands sent waiting for a reply. */
mstime_t cc_conn_time; /* cc connection time. */
mstime_t pc_conn_time; /* pc connection time. */
mstime_t pc_last_activity; /* Last time we received any message. */
mstime_t last_avail_time; /* Last time the instance replied to ping with
a reply we consider valid. */
- mstime_t last_ping_time; /* Last time a pending ping was sent in the
- context of the current command connection
- with the instance. 0 if still not sent or
- if pong already received. */
+ mstime_t act_ping_time; /* Time at which the last pending ping (no pong
+ received after it) was sent. This field is
+ set to 0 when a pong is received, and set again
+ to the current time if the value is 0 and a new
+ ping is sent. */
+ mstime_t last_ping_time; /* Time at which we sent the last ping. This is
+ only used to avoid sending too many pings
+ during failure. Idle time is computed using
+ the act_ping_time field. */
mstime_t last_pong_time; /* Last time the instance replied to ping,
whatever the reply was. That's used to check
if the link is idle and must be reconnected. */
+ mstime_t last_reconn_time; /* Last reconnection attempt performed when
+ the link was down. */
+} instanceLink;
+
+typedef struct sentinelRedisInstance {
+ int flags; /* See SRI_... defines */
+ char *name; /* Master name from the point of view of this sentinel. */
+ char *runid; /* Run ID of this instance, or unique ID if is a Sentinel.*/
+ uint64_t config_epoch; /* Configuration epoch. */
+ sentinelAddr *addr; /* Master host. */
+ instanceLink *link; /* Link to the instance, may be shared for Sentinels. */
mstime_t last_pub_time; /* Last time we sent hello via Pub/Sub. */
mstime_t last_hello_time; /* Only used if SRI_SENTINEL is set. Last time
we received a hello from this Sentinel
@@ -195,19 +226,21 @@ typedef struct sentinelRedisInstance {
/* Main state. */
struct sentinelState {
- uint64_t current_epoch; /* Current epoch. */
+ char myid[REDIS_RUN_ID_SIZE+1]; /* This sentinel ID. */
+ uint64_t current_epoch; /* Current epoch. */
dict *masters; /* Dictionary of master sentinelRedisInstances.
Key is the instance name, value is the
sentinelRedisInstance structure pointer. */
int tilt; /* Are we in TILT mode? */
int running_scripts; /* Number of scripts in execution right now. */
- mstime_t tilt_start_time; /* When TITL started. */
- mstime_t previous_time; /* Last time we ran the time handler. */
- list *scripts_queue; /* Queue of user scripts to execute. */
- char *announce_ip; /* IP addr that is gossiped to other sentinels if
- not NULL. */
- int announce_port; /* Port that is gossiped to other sentinels if
- non zero. */
+ mstime_t tilt_start_time; /* When TITL started. */
+ mstime_t previous_time; /* Last time we ran the time handler. */
+ list *scripts_queue; /* Queue of user scripts to execute. */
+ char *announce_ip; /* IP addr that is gossiped to other sentinels if
+ not NULL. */
+ int announce_port; /* Port that is gossiped to other sentinels if
+ non zero. */
+ unsigned long simfailure_flags; /* Failures simulation. */
} sentinel;
/* A script execution job. */
@@ -327,8 +360,7 @@ sentinelRedisInstance *sentinelGetMasterByName(char *name);
char *sentinelGetSubjectiveLeader(sentinelRedisInstance *master);
char *sentinelGetObjectiveLeader(sentinelRedisInstance *master);
int yesnotoi(char *s);
-void sentinelDisconnectInstanceFromContext(const redisAsyncContext *c);
-void sentinelKillLink(sentinelRedisInstance *ri, redisAsyncContext *c);
+void instanceLinkConnectionError(const redisAsyncContext *c);
const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri);
void sentinelAbortFailover(sentinelRedisInstance *ri);
void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, const char *fmt, ...);
@@ -342,6 +374,8 @@ void sentinelFlushConfig(void);
void sentinelGenerateInitialMonitorEvents(void);
int sentinelSendPing(sentinelRedisInstance *ri);
int sentinelForceHelloUpdateForMaster(sentinelRedisInstance *master);
+sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *ip, int port, char *runid);
+void sentinelSimFailureCrash(void);
/* ========================= Dictionary types =============================== */
@@ -398,6 +432,7 @@ struct redisCommand sentinelcmds[] = {
{"publish",sentinelPublishCommand,3,"",0,NULL,0,0,0,0,0},
{"info",sentinelInfoCommand,-1,"",0,NULL,0,0,0,0,0},
{"role",sentinelRoleCommand,1,"l",0,NULL,0,0,0,0,0},
+ {"client",clientCommand,-2,"rs",0,NULL,0,0,0,0,0},
{"shutdown",shutdownCommand,-1,"",0,NULL,0,0,0,0,0}
};
@@ -432,12 +467,14 @@ void initSentinel(void) {
sentinel.scripts_queue = listCreate();
sentinel.announce_ip = NULL;
sentinel.announce_port = 0;
+ sentinel.simfailure_flags = SENTINEL_SIMFAILURE_NONE;
+ memset(sentinel.myid,0,sizeof(sentinel.myid));
}
/* This function gets called when the server is in Sentinel mode, started,
* loaded the configuration, and is ready for normal operations. */
void sentinelIsRunning(void) {
- redisLog(REDIS_WARNING,"Sentinel runid is %s", server.runid);
+ int j;
if (server.configfile == NULL) {
redisLog(REDIS_WARNING,
@@ -450,6 +487,21 @@ void sentinelIsRunning(void) {
exit(1);
}
+ /* If this Sentinel has yet no ID set in the configuration file, we
+ * pick a random one and persist the config on disk. From now on this
+ * will be this Sentinel ID across restarts. */
+ for (j = 0; j < REDIS_RUN_ID_SIZE; j++)
+ if (sentinel.myid[j] != 0) break;
+
+ if (j == REDIS_RUN_ID_SIZE) {
+ /* Pick ID and presist the config. */
+ getRandomHexChars(sentinel.myid,REDIS_RUN_ID_SIZE);
+ sentinelFlushConfig();
+ }
+
+ /* Log its ID to make debugging of issues simpler. */
+ redisLog(REDIS_WARNING,"Sentinel ID is %s", sentinel.myid);
+
/* We want to generate a +monitor event for every configured master
* at startup. */
sentinelGenerateInitialMonitorEvents();
@@ -871,6 +923,194 @@ void sentinelCallClientReconfScript(sentinelRedisInstance *master, int role, cha
state, from->ip, fromport, to->ip, toport, NULL);
}
+/* =============================== instanceLink ============================= */
+
+/* Create a not yet connected link object. */
+instanceLink *createInstanceLink(void) {
+ instanceLink *link = zmalloc(sizeof(*link));
+
+ link->refcount = 1;
+ link->disconnected = 1;
+ link->pending_commands = 0;
+ link->cc = NULL;
+ link->pc = NULL;
+ link->cc_conn_time = 0;
+ link->pc_conn_time = 0;
+ link->last_reconn_time = 0;
+ link->pc_last_activity = 0;
+ /* We set the act_ping_time to "now" even if we actually don't have yet
+ * a connection with the node, nor we sent a ping.
+ * This is useful to detect a timeout in case we'll not be able to connect
+ * with the node at all. */
+ link->act_ping_time = mstime();
+ link->last_ping_time = 0;
+ link->last_avail_time = mstime();
+ link->last_pong_time = mstime();
+ return link;
+}
+
+/* Disconnect an hiredis connection in the context of an instance link. */
+void instanceLinkCloseConnection(instanceLink *link, redisAsyncContext *c) {
+ if (c == NULL) return;
+
+ if (link->cc == c) {
+ link->cc = NULL;
+ link->pending_commands = 0;
+ }
+ if (link->pc == c) link->pc = NULL;
+ c->data = NULL;
+ link->disconnected = 1;
+ redisAsyncFree(c);
+}
+
+/* Decrement the refcount of a link object, if it drops to zero, actually
+ * free it and return NULL. Otherwise don't do anything and return the pointer
+ * to the object.
+ *
+ * If we are not going to free the link and ri is not NULL, we rebind all the
+ * pending requests in link->cc (hiredis connection for commands) to a
+ * callback that will just ignore them. This is useful to avoid processing
+ * replies for an instance that no longer exists. */
+instanceLink *releaseInstanceLink(instanceLink *link, sentinelRedisInstance *ri)
+{
+ redisAssert(link->refcount > 0);
+ link->refcount--;
+ if (link->refcount != 0) {
+ if (ri && ri->link->cc) {
+ /* This instance may have pending callbacks in the hiredis async
+ * context, having as 'privdata' the instance that we are going to
+ * free. Let's rewrite the callback list, directly exploiting
+ * hiredis internal data structures, in order to bind them with
+ * a callback that will ignore the reply at all. */
+ redisCallback *cb;
+ redisCallbackList *callbacks = &link->cc->replies;
+
+ cb = callbacks->head;
+ while(cb) {
+ if (cb->privdata == ri) {
+ cb->fn = sentinelDiscardReplyCallback;
+ cb->privdata = NULL; /* Not strictly needed. */
+ }
+ cb = cb->next;
+ }
+ }
+ return link; /* Other active users. */
+ }
+
+ instanceLinkCloseConnection(link,link->cc);
+ instanceLinkCloseConnection(link,link->pc);
+ zfree(link);
+ return NULL;
+}
+
+/* This function will attempt to share the instance link we already have
+ * for the same Sentinel in the context of a different master, with the
+ * instance we are passing as argument.
+ *
+ * This way multiple Sentinel objects that refer all to the same physical
+ * Sentinel instance but in the context of different masters will use
+ * a single connection, will send a single PING per second for failure
+ * detection and so forth.
+ *
+ * Return REDIS_OK if a matching Sentinel was found in the context of a
+ * different master and sharing was performed. Otherwise REDIS_ERR
+ * is returned. */
+int sentinelTryConnectionSharing(sentinelRedisInstance *ri) {
+ redisAssert(ri->flags & SRI_SENTINEL);
+ dictIterator *di;
+ dictEntry *de;
+
+ if (ri->runid == NULL) return REDIS_ERR; /* No way to identify it. */
+ if (ri->link->refcount > 1) return REDIS_ERR; /* Already shared. */
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *master = dictGetVal(de), *match;
+ /* We want to share with the same physical Sentinel referenced
+ * in other masters, so skip our master. */
+ if (master == ri->master) continue;
+ match = getSentinelRedisInstanceByAddrAndRunID(master->sentinels,
+ NULL,0,ri->runid);
+ if (match == NULL) continue; /* No match. */
+ if (match == ri) continue; /* Should never happen but... safer. */
+
+ /* We identified a matching Sentinel, great! Let's free our link
+ * and use the one of the matching Sentinel. */
+ releaseInstanceLink(ri->link,NULL);
+ ri->link = match->link;
+ match->link->refcount++;
+ return REDIS_OK;
+ }
+ dictReleaseIterator(di);
+ return REDIS_ERR;
+}
+
+/* When we detect a Sentinel to switch address (reporting a different IP/port
+ * pair in Hello messages), let's update all the matching Sentinels in the
+ * context of other masters as well and disconnect the links, so that everybody
+ * will be updated.
+ *
+ * Return the number of updated Sentinel addresses. */
+int sentinelUpdateSentinelAddressInAllMasters(sentinelRedisInstance *ri) {
+ redisAssert(ri->flags & SRI_SENTINEL);
+ dictIterator *di;
+ dictEntry *de;
+ int reconfigured = 0;
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *master = dictGetVal(de), *match;
+ match = getSentinelRedisInstanceByAddrAndRunID(master->sentinels,
+ NULL,0,ri->runid);
+ if (match->link->disconnected == 0) {
+ instanceLinkCloseConnection(match->link,match->link->cc);
+ instanceLinkCloseConnection(match->link,match->link->pc);
+ }
+ if (match == ri) continue; /* Address already updated for it. */
+ /* Update the address of the matching Sentinel by copying the address
+ * of the Sentinel object that received the address update. */
+ releaseSentinelAddr(match->addr);
+ match->addr = dupSentinelAddr(ri->addr);
+ reconfigured++;
+ }
+ dictReleaseIterator(di);
+ if (reconfigured)
+ sentinelEvent(REDIS_NOTICE,"+sentinel-address-update", ri,
+ "%@ %d additional matching instances", reconfigured);
+ return reconfigured;
+}
+
+/* This function is called when an hiredis connection reported an error.
+ * We set it to NULL and mark the link as disconnected so that it will be
+ * reconnected again.
+ *
+ * Note: we don't free the hiredis context as hiredis will do it for us
+ * for async connections. */
+void instanceLinkConnectionError(const redisAsyncContext *c) {
+ instanceLink *link = c->data;
+ int pubsub;
+
+ if (!link) return;
+
+ pubsub = (link->pc == c);
+ if (pubsub)
+ link->pc = NULL;
+ else
+ link->cc = NULL;
+ link->disconnected = 1;
+}
+
+/* Hiredis connection established / disconnected callbacks. We need them
+ * just to cleanup our link state. */
+void sentinelLinkEstablishedCallback(const redisAsyncContext *c, int status) {
+ if (status != REDIS_OK) instanceLinkConnectionError(c);
+}
+
+void sentinelDisconnectCallback(const redisAsyncContext *c, int status) {
+ REDIS_NOTUSED(status);
+ instanceLinkConnectionError(c);
+}
+
/* ========================== sentinelRedisInstance ========================= */
/* Create a redis instance, the following fields must be populated by the
@@ -893,6 +1133,7 @@ void sentinelCallClientReconfScript(sentinelRedisInstance *master, int role, cha
*
* The function may also fail and return NULL with errno set to EBUSY if
* a master or slave with the same name already exists. */
+
sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *hostname, int port, int quorum, sentinelRedisInstance *master) {
sentinelRedisInstance *ri;
sentinelAddr *addr;
@@ -921,6 +1162,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
else if (flags & SRI_SENTINEL) table = master->sentinels;
sdsname = sdsnew(name);
if (dictFind(table,sdsname)) {
+ releaseSentinelAddr(addr);
sdsfree(sdsname);
errno = EBUSY;
return NULL;
@@ -930,24 +1172,12 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
ri = zmalloc(sizeof(*ri));
/* Note that all the instances are started in the disconnected state,
* the event loop will take care of connecting them. */
- ri->flags = flags | SRI_DISCONNECTED;
+ ri->flags = flags;
ri->name = sdsname;
ri->runid = NULL;
ri->config_epoch = 0;
ri->addr = addr;
- ri->cc = NULL;
- ri->pc = NULL;
- ri->pending_commands = 0;
- ri->cc_conn_time = 0;
- ri->pc_conn_time = 0;
- ri->pc_last_activity = 0;
- /* We set the last_ping_time to "now" even if we actually don't have yet
- * a connection with the node, nor we sent a ping.
- * This is useful to detect a timeout in case we'll not be able to connect
- * with the node at all. */
- ri->last_ping_time = mstime();
- ri->last_avail_time = mstime();
- ri->last_pong_time = mstime();
+ ri->link = createInstanceLink();
ri->last_pub_time = mstime();
ri->last_hello_time = mstime();
ri->last_master_down_reply_time = mstime();
@@ -1003,9 +1233,8 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
dictRelease(ri->sentinels);
dictRelease(ri->slaves);
- /* Release hiredis connections. */
- if (ri->cc) sentinelKillLink(ri,ri->cc);
- if (ri->pc) sentinelKillLink(ri,ri->pc);
+ /* Disconnect the instance. */
+ releaseInstanceLink(ri->link,ri);
/* Free other resources. */
sdsfree(ri->name);
@@ -1049,35 +1278,29 @@ const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri) {
else return "unknown";
}
-/* This function removes all the instances found in the dictionary of
- * sentinels in the specified 'master', having either:
- *
- * 1) The same ip/port as specified.
- * 2) The same runid.
+/* This function remove the Sentinel with the specified ID from the
+ * specified master.
*
- * "1" and "2" don't need to verify at the same time, just one is enough.
- * If "runid" is NULL it is not checked.
- * Similarly if "ip" is NULL it is not checked.
+ * If "runid" is NULL the function returns ASAP.
*
- * This function is useful because every time we add a new Sentinel into
- * a master's Sentinels dictionary, we want to be very sure about not
- * having duplicated instances for any reason. This is important because
- * other sentinels are needed to reach ODOWN quorum, and later to get
- * voted for a given configuration epoch in order to perform the failover.
+ * This function is useful because on Sentinels address switch, we want to
+ * remove our old entry and add a new one for the same ID but with the new
+ * address.
*
- * The function returns the number of Sentinels removed. */
-int removeMatchingSentinelsFromMaster(sentinelRedisInstance *master, char *ip, int port, char *runid) {
+ * The function returns 1 if the matching Sentinel was removed, otherwise
+ * 0 if there was no Sentinel with this ID. */
+int removeMatchingSentinelFromMaster(sentinelRedisInstance *master, char *runid) {
dictIterator *di;
dictEntry *de;
int removed = 0;
+ if (runid == NULL) return 0;
+
di = dictGetSafeIterator(master->sentinels);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
- if ((ri->runid && runid && strcmp(ri->runid,runid) == 0) ||
- (ip && strcmp(ri->addr->ip,ip) == 0 && port == ri->addr->port))
- {
+ if (ri->runid && strcmp(ri->runid,runid) == 0) {
dictDelete(master->sentinels,ri->name);
removed++;
}
@@ -1156,7 +1379,9 @@ void sentinelDelFlagsToDictOfRedisInstances(dict *instances, int flags) {
* 1) Remove all slaves.
* 2) Remove all sentinels.
* 3) Remove most of the flags resulting from runtime operations.
- * 4) Reset timers to their default value.
+ * 4) Reset timers to their default value. For example after a reset it will be
+ * possible to failover again the same master ASAP, without waiting the
+ * failover timeout delay.
* 5) In the process of doing this undo the failover if in progress.
* 6) Disconnect the connections with the master (will reconnect automatically).
*/
@@ -1170,24 +1395,25 @@ void sentinelResetMaster(sentinelRedisInstance *ri, int flags) {
dictRelease(ri->sentinels);
ri->sentinels = dictCreate(&instancesDictType,NULL);
}
- if (ri->cc) sentinelKillLink(ri,ri->cc);
- if (ri->pc) sentinelKillLink(ri,ri->pc);
- ri->flags &= SRI_MASTER|SRI_DISCONNECTED;
+ instanceLinkCloseConnection(ri->link,ri->link->cc);
+ instanceLinkCloseConnection(ri->link,ri->link->pc);
+ ri->flags &= SRI_MASTER;
if (ri->leader) {
sdsfree(ri->leader);
ri->leader = NULL;
}
ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
ri->failover_state_change_time = 0;
- ri->failover_start_time = 0;
+ ri->failover_start_time = 0; /* We can failover again ASAP. */
ri->promoted_slave = NULL;
sdsfree(ri->runid);
sdsfree(ri->slave_master_host);
ri->runid = NULL;
ri->slave_master_host = NULL;
- ri->last_ping_time = mstime();
- ri->last_avail_time = mstime();
- ri->last_pong_time = mstime();
+ ri->link->act_ping_time = mstime();
+ ri->link->last_ping_time = 0;
+ ri->link->last_avail_time = mstime();
+ ri->link->last_pong_time = mstime();
ri->role_reported_time = mstime();
ri->role_reported = SRI_MASTER;
if (flags & SENTINEL_GENERATE_EVENT)
@@ -1269,10 +1495,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip,
slave = createSentinelRedisInstance(NULL,SRI_SLAVE,slaves[j]->ip,
slaves[j]->port, master->quorum, master);
releaseSentinelAddr(slaves[j]);
- if (slave) {
- sentinelEvent(REDIS_NOTICE,"+slave",slave,"%@");
- sentinelFlushConfig();
- }
+ if (slave) sentinelEvent(REDIS_NOTICE,"+slave",slave,"%@");
}
zfree(slaves);
@@ -1330,6 +1553,13 @@ void sentinelPropagateDownAfterPeriod(sentinelRedisInstance *master) {
}
}
+char *sentinelGetInstanceTypeString(sentinelRedisInstance *ri) {
+ if (ri->flags & SRI_MASTER) return "master";
+ else if (ri->flags & SRI_SLAVE) return "slave";
+ else if (ri->flags & SRI_SENTINEL) return "sentinel";
+ else return "unknown";
+}
+
/* ============================ Config handling ============================= */
char *sentinelHandleConfiguration(char **argv, int argc) {
sentinelRedisInstance *ri;
@@ -1393,6 +1623,10 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
unsigned long long current_epoch = strtoull(argv[1],NULL,10);
if (current_epoch > sentinel.current_epoch)
sentinel.current_epoch = current_epoch;
+ } else if (!strcasecmp(argv[0],"myid") && argc == 2) {
+ if (strlen(argv[1]) != REDIS_RUN_ID_SIZE)
+ return "Malformed Sentinel id in myid option.";
+ memcpy(sentinel.myid,argv[1],REDIS_RUN_ID_SIZE);
} else if (!strcasecmp(argv[0],"config-epoch") && argc == 3) {
/* config-epoch <name> <epoch> */
ri = sentinelGetMasterByName(argv[1]);
@@ -1431,7 +1665,10 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
{
return "Wrong hostname or port for sentinel.";
}
- if (argc == 5) si->runid = sdsnew(argv[4]);
+ if (argc == 5) {
+ si->runid = sdsnew(argv[4]);
+ sentinelTryConnectionSharing(si);
+ }
} else if (!strcasecmp(argv[0],"announce-ip") && argc == 2) {
/* announce-ip <ip-address> */
if (strlen(argv[1]))
@@ -1455,6 +1692,10 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
dictEntry *de;
sds line;
+ /* sentinel unique ID. */
+ line = sdscatprintf(sdsempty(), "sentinel myid %s", sentinel.myid);
+ rewriteConfigRewriteLine(state,"sentinel",line,1);
+
/* For every master emit a "sentinel monitor" config entry. */
di = dictGetIterator(sentinel.masters);
while((de = dictNext(di)) != NULL) {
@@ -1546,7 +1787,7 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
slave_addr = master->addr;
line = sdscatprintf(sdsempty(),
"sentinel known-slave %s %s %d",
- master->name, ri->addr->ip, ri->addr->port);
+ master->name, slave_addr->ip, slave_addr->port);
rewriteConfigRewriteLine(state,"sentinel",line,1);
}
dictReleaseIterator(di2);
@@ -1616,57 +1857,6 @@ werr:
/* ====================== hiredis connection handling ======================= */
-/* Completely disconnect a hiredis link from an instance. */
-void sentinelKillLink(sentinelRedisInstance *ri, redisAsyncContext *c) {
- if (ri->cc == c) {
- ri->cc = NULL;
- ri->pending_commands = 0;
- }
- if (ri->pc == c) ri->pc = NULL;
- c->data = NULL;
- ri->flags |= SRI_DISCONNECTED;
- redisAsyncFree(c);
-}
-
-/* This function takes a hiredis context that is in an error condition
- * and make sure to mark the instance as disconnected performing the
- * cleanup needed.
- *
- * Note: we don't free the hiredis context as hiredis will do it for us
- * for async connections. */
-void sentinelDisconnectInstanceFromContext(const redisAsyncContext *c) {
- sentinelRedisInstance *ri = c->data;
- int pubsub;
-
- if (ri == NULL) return; /* The instance no longer exists. */
-
- pubsub = (ri->pc == c);
- sentinelEvent(REDIS_DEBUG, pubsub ? "-pubsub-link" : "-cmd-link", ri,
- "%@ #%s", c->errstr);
- if (pubsub)
- ri->pc = NULL;
- else
- ri->cc = NULL;
- ri->flags |= SRI_DISCONNECTED;
-}
-
-void sentinelLinkEstablishedCallback(const redisAsyncContext *c, int status) {
- if (status != REDIS_OK) {
- sentinelDisconnectInstanceFromContext(c);
- } else {
- sentinelRedisInstance *ri = c->data;
- int pubsub = (ri->pc == c);
-
- sentinelEvent(REDIS_DEBUG, pubsub ? "+pubsub-link" : "+cmd-link", ri,
- "%@");
- }
-}
-
-void sentinelDisconnectCallback(const redisAsyncContext *c, int status) {
- REDIS_NOTUSED(status);
- sentinelDisconnectInstanceFromContext(c);
-}
-
/* Send the AUTH command with the specified master password if needed.
* Note that for slaves the password set for the master is used.
*
@@ -1678,8 +1868,8 @@ void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
ri->master->auth_pass;
if (auth_pass) {
- if (redisAsyncCommand(c, sentinelDiscardReplyCallback, NULL, "AUTH %s",
- auth_pass) == REDIS_OK) ri->pending_commands++;
+ if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri, "AUTH %s",
+ auth_pass) == REDIS_OK) ri->link->pending_commands++;
}
}
@@ -1692,77 +1882,82 @@ void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
void sentinelSetClientName(sentinelRedisInstance *ri, redisAsyncContext *c, char *type) {
char name[64];
- snprintf(name,sizeof(name),"sentinel-%.8s-%s",server.runid,type);
- if (redisAsyncCommand(c, sentinelDiscardReplyCallback, NULL,
+ snprintf(name,sizeof(name),"sentinel-%.8s-%s",sentinel.myid,type);
+ if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri,
"CLIENT SETNAME %s", name) == REDIS_OK)
{
- ri->pending_commands++;
+ ri->link->pending_commands++;
}
}
-/* Create the async connections for the specified instance if the instance
- * is disconnected. Note that the SRI_DISCONNECTED flag is set even if just
+/* Create the async connections for the instance link if the link
+ * is disconnected. Note that link->disconnected is true even if just
* one of the two links (commands and pub/sub) is missing. */
void sentinelReconnectInstance(sentinelRedisInstance *ri) {
- if (!(ri->flags & SRI_DISCONNECTED)) return;
+ if (ri->link->disconnected == 0) return;
+ instanceLink *link = ri->link;
+ mstime_t now = mstime();
+
+ if (now - ri->link->last_reconn_time < SENTINEL_PING_PERIOD) return;
+ ri->link->last_reconn_time = now;
/* Commands connection. */
- if (ri->cc == NULL) {
- ri->cc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,REDIS_BIND_ADDR);
- if (ri->cc->err) {
+ if (link->cc == NULL) {
+ link->cc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,REDIS_BIND_ADDR);
+ if (link->cc->err) {
sentinelEvent(REDIS_DEBUG,"-cmd-link-reconnection",ri,"%@ #%s",
- ri->cc->errstr);
- sentinelKillLink(ri,ri->cc);
+ link->cc->errstr);
+ instanceLinkCloseConnection(link,link->cc);
} else {
- ri->cc_conn_time = mstime();
- ri->cc->data = ri;
- redisAeAttach(server.el,ri->cc);
- redisAsyncSetConnectCallback(ri->cc,
- sentinelLinkEstablishedCallback);
- redisAsyncSetDisconnectCallback(ri->cc,
- sentinelDisconnectCallback);
- sentinelSendAuthIfNeeded(ri,ri->cc);
- sentinelSetClientName(ri,ri->cc,"cmd");
+ link->cc_conn_time = mstime();
+ link->cc->data = link;
+ redisAeAttach(server.el,link->cc);
+ redisAsyncSetConnectCallback(link->cc,
+ sentinelLinkEstablishedCallback);
+ redisAsyncSetDisconnectCallback(link->cc,
+ sentinelDisconnectCallback);
+ sentinelSendAuthIfNeeded(ri,link->cc);
+ sentinelSetClientName(ri,link->cc,"cmd");
/* Send a PING ASAP when reconnecting. */
sentinelSendPing(ri);
}
}
/* Pub / Sub */
- if ((ri->flags & (SRI_MASTER|SRI_SLAVE)) && ri->pc == NULL) {
- ri->pc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,REDIS_BIND_ADDR);
- if (ri->pc->err) {
+ if ((ri->flags & (SRI_MASTER|SRI_SLAVE)) && link->pc == NULL) {
+ link->pc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,REDIS_BIND_ADDR);
+ if (link->pc->err) {
sentinelEvent(REDIS_DEBUG,"-pubsub-link-reconnection",ri,"%@ #%s",
- ri->pc->errstr);
- sentinelKillLink(ri,ri->pc);
+ link->pc->errstr);
+ instanceLinkCloseConnection(link,link->pc);
} else {
int retval;
- ri->pc_conn_time = mstime();
- ri->pc->data = ri;
- redisAeAttach(server.el,ri->pc);
- redisAsyncSetConnectCallback(ri->pc,
- sentinelLinkEstablishedCallback);
- redisAsyncSetDisconnectCallback(ri->pc,
- sentinelDisconnectCallback);
- sentinelSendAuthIfNeeded(ri,ri->pc);
- sentinelSetClientName(ri,ri->pc,"pubsub");
+ link->pc_conn_time = mstime();
+ link->pc->data = link;
+ redisAeAttach(server.el,link->pc);
+ redisAsyncSetConnectCallback(link->pc,
+ sentinelLinkEstablishedCallback);
+ redisAsyncSetDisconnectCallback(link->pc,
+ sentinelDisconnectCallback);
+ sentinelSendAuthIfNeeded(ri,link->pc);
+ sentinelSetClientName(ri,link->pc,"pubsub");
/* Now we subscribe to the Sentinels "Hello" channel. */
- retval = redisAsyncCommand(ri->pc,
- sentinelReceiveHelloMessages, NULL, "SUBSCRIBE %s",
+ retval = redisAsyncCommand(link->pc,
+ sentinelReceiveHelloMessages, ri, "SUBSCRIBE %s",
SENTINEL_HELLO_CHANNEL);
if (retval != REDIS_OK) {
/* If we can't subscribe, the Pub/Sub connection is useless
* and we can simply disconnect it and try again. */
- sentinelKillLink(ri,ri->pc);
+ instanceLinkCloseConnection(link,link->pc);
return;
}
}
}
- /* Clear the DISCONNECTED flags only if we have both the connections
+ /* Clear the disconnected status only if we have both the connections
* (or just the commands connection if this is a sentinel instance). */
- if (ri->cc && (ri->flags & SRI_SENTINEL || ri->pc))
- ri->flags &= ~SRI_DISCONNECTED;
+ if (link->cc && (ri->flags & SRI_SENTINEL || link->pc))
+ link->disconnected = 0;
}
/* ======================== Redis instances pinging ======================== */
@@ -1848,6 +2043,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
atoi(port), ri->quorum, ri)) != NULL)
{
sentinelEvent(REDIS_NOTICE,"+slave",slave,"%@");
+ sentinelFlushConfig();
}
}
}
@@ -1954,6 +2150,9 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
ri->master->failover_state_change_time = mstime();
sentinelFlushConfig();
sentinelEvent(REDIS_WARNING,"+promoted-slave",ri,"%@");
+ if (sentinel.simfailure_flags &
+ SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION)
+ sentinelSimFailureCrash();
sentinelEvent(REDIS_WARNING,"+failover-state-reconf-slaves",
ri->master,"%@");
sentinelCallClientReconfScript(ri->master,SENTINEL_LEADER,
@@ -2030,36 +2229,35 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
}
void sentinelInfoReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
redisReply *r;
- REDIS_NOTUSED(privdata);
- if (ri) ri->pending_commands--;
- if (!reply || !ri) return;
+ if (!reply || !link) return;
+ link->pending_commands--;
r = reply;
- if (r->type == REDIS_REPLY_STRING) {
+ if (r->type == REDIS_REPLY_STRING)
sentinelRefreshInstanceInfo(ri,r->str);
- }
}
/* Just discard the reply. We use this when we are not monitoring the return
* value of the command but its effects directly. */
void sentinelDiscardReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ instanceLink *link = c->data;
REDIS_NOTUSED(reply);
REDIS_NOTUSED(privdata);
- if (ri) ri->pending_commands--;
+ if (link) link->pending_commands--;
}
void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
redisReply *r;
- REDIS_NOTUSED(privdata);
- if (ri) ri->pending_commands--;
- if (!reply || !ri) return;
+ if (!reply || !link) return;
+ link->pending_commands--;
r = reply;
if (r->type == REDIS_REPLY_STATUS ||
@@ -2070,8 +2268,8 @@ void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata
strncmp(r->str,"LOADING",7) == 0 ||
strncmp(r->str,"MASTERDOWN",10) == 0)
{
- ri->last_avail_time = mstime();
- ri->last_ping_time = 0; /* Flag the pong as received. */
+ link->last_avail_time = mstime();
+ link->act_ping_time = 0; /* Flag the pong as received. */
} else {
/* Send a SCRIPT KILL command if the instance appears to be
* down because of a busy script. */
@@ -2079,26 +2277,26 @@ void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata
(ri->flags & SRI_S_DOWN) &&
!(ri->flags & SRI_SCRIPT_KILL_SENT))
{
- if (redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL,
+ if (redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri,
"SCRIPT KILL") == REDIS_OK)
- ri->pending_commands++;
+ ri->link->pending_commands++;
ri->flags |= SRI_SCRIPT_KILL_SENT;
}
}
}
- ri->last_pong_time = mstime();
+ link->last_pong_time = mstime();
}
/* This is called when we get the reply about the PUBLISH command we send
* to the master to advertise this sentinel. */
void sentinelPublishReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
redisReply *r;
- REDIS_NOTUSED(privdata);
- if (ri) ri->pending_commands--;
- if (!reply || !ri) return;
+ if (!reply || !link) return;
+ link->pending_commands--;
r = reply;
/* Only update pub_time if we actually published our message. Otherwise
@@ -2136,25 +2334,25 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) {
if (!si) {
/* If not, remove all the sentinels that have the same runid
- * OR the same ip/port, because it's either a restart or a
- * network topology change. */
- removed = removeMatchingSentinelsFromMaster(master,token[0],port,
- token[2]);
+ * because there was an address change, and add the same Sentinel
+ * with the new address back. */
+ removed = removeMatchingSentinelFromMaster(master,token[2]);
if (removed) {
- sentinelEvent(REDIS_NOTICE,"-dup-sentinel",master,
- "%@ #duplicate of %s:%d or %s",
- token[0],port,token[2]);
+ sentinelEvent(REDIS_NOTICE,"+sentinel-address-switch",master,
+ "%@ ip %s port %d for %s", token[0],port,token[2]);
}
/* Add the new sentinel. */
si = createSentinelRedisInstance(NULL,SRI_SENTINEL,
token[0],port,master->quorum,master);
if (si) {
- sentinelEvent(REDIS_NOTICE,"+sentinel",si,"%@");
+ if (!removed) sentinelEvent(REDIS_NOTICE,"+sentinel",si,"%@");
/* The runid is NULL after a new instance creation and
* for Sentinels we don't have a later chance to fill it,
* so do it now. */
si->runid = sdsnew(token[2]);
+ sentinelTryConnectionSharing(si);
+ if (removed) sentinelUpdateSentinelAddressInAllMasters(si);
sentinelFlushConfig();
}
}
@@ -2203,9 +2401,9 @@ cleanup:
/* This is our Pub/Sub callback for the Hello channel. It's useful in order
* to discover other sentinels attached at the same master. */
void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
redisReply *r;
- REDIS_NOTUSED(privdata);
+ REDIS_NOTUSED(c);
if (!reply || !ri) return;
r = reply;
@@ -2213,7 +2411,7 @@ void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privd
/* Update the last activity in the pubsub channel. Note that since we
* receive our messages as well this timestamp can be used to detect
* if the link is probably disconnected even if it seems otherwise. */
- ri->pc_last_activity = mstime();
+ ri->link->pc_last_activity = mstime();
/* Sanity check in the reply we expect, so that the code that follows
* can avoid to check for details. */
@@ -2225,7 +2423,7 @@ void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privd
strcmp(r->element[0]->str,"message") != 0) return;
/* We are not interested in meeting ourselves */
- if (strstr(r->element[2]->str,server.runid) != NULL) return;
+ if (strstr(r->element[2]->str,sentinel.myid) != NULL) return;
sentinelProcessHelloMessage(r->element[2]->str, r->element[2]->len);
}
@@ -2250,14 +2448,14 @@ int sentinelSendHello(sentinelRedisInstance *ri) {
sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ? ri : ri->master;
sentinelAddr *master_addr = sentinelGetCurrentMasterAddress(master);
- if (ri->flags & SRI_DISCONNECTED) return REDIS_ERR;
+ if (ri->link->disconnected) return REDIS_ERR;
/* Use the specified announce address if specified, otherwise try to
* obtain our own IP address. */
if (sentinel.announce_ip) {
announce_ip = sentinel.announce_ip;
} else {
- if (anetSockName(ri->cc->c.fd,ip,sizeof(ip),NULL) == -1)
+ if (anetSockName(ri->link->cc->c.fd,ip,sizeof(ip),NULL) == -1)
return REDIS_ERR;
announce_ip = ip;
}
@@ -2268,16 +2466,16 @@ int sentinelSendHello(sentinelRedisInstance *ri) {
snprintf(payload,sizeof(payload),
"%s,%d,%s,%llu," /* Info about this sentinel. */
"%s,%s,%d,%llu", /* Info about current master. */
- announce_ip, announce_port, server.runid,
+ announce_ip, announce_port, sentinel.myid,
(unsigned long long) sentinel.current_epoch,
/* --- */
master->name,master_addr->ip,master_addr->port,
(unsigned long long) master->config_epoch);
- retval = redisAsyncCommand(ri->cc,
- sentinelPublishReplyCallback, NULL, "PUBLISH %s %s",
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelPublishReplyCallback, ri, "PUBLISH %s %s",
SENTINEL_HELLO_CHANNEL,payload);
if (retval != REDIS_OK) return REDIS_ERR;
- ri->pending_commands++;
+ ri->link->pending_commands++;
return REDIS_OK;
}
@@ -2313,20 +2511,22 @@ int sentinelForceHelloUpdateForMaster(sentinelRedisInstance *master) {
return REDIS_OK;
}
-/* Send a PING to the specified instance and refresh the last_ping_time
+/* Send a PING to the specified instance and refresh the act_ping_time
* if it is zero (that is, if we received a pong for the previous ping).
*
* On error zero is returned, and we can't consider the PING command
* queued in the connection. */
int sentinelSendPing(sentinelRedisInstance *ri) {
- int retval = redisAsyncCommand(ri->cc,
- sentinelPingReplyCallback, NULL, "PING");
+ int retval = redisAsyncCommand(ri->link->cc,
+ sentinelPingReplyCallback, ri, "PING");
if (retval == REDIS_OK) {
- ri->pending_commands++;
- /* We update the ping time only if we received the pong for
- * the previous ping, otherwise we are technically waiting
- * since the first ping that did not received a reply. */
- if (ri->last_ping_time == 0) ri->last_ping_time = mstime();
+ ri->link->pending_commands++;
+ ri->link->last_ping_time = mstime();
+ /* We update the active ping time only if we received the pong for
+ * the previous ping, otherwise we are technically waiting since the
+ * first ping that did not received a reply. */
+ if (ri->link->act_ping_time == 0)
+ ri->link->act_ping_time = ri->link->last_ping_time;
return 1;
} else {
return 0;
@@ -2342,7 +2542,7 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
/* Return ASAP if we have already a PING or INFO already pending, or
* in the case the instance is not properly connected. */
- if (ri->flags & SRI_DISCONNECTED) return;
+ if (ri->link->disconnected) return;
/* For INFO, PING, PUBLISH that are not critical commands to send we
* also have a limit of SENTINEL_MAX_PENDING_COMMANDS. We don't
@@ -2350,7 +2550,8 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
* properly (note that anyway there is a redundant protection about this,
* that is, the link will be disconnected and reconnected if a long
* timeout condition is detected. */
- if (ri->pending_commands >= SENTINEL_MAX_PENDING_COMMANDS) return;
+ if (ri->link->pending_commands >=
+ SENTINEL_MAX_PENDING_COMMANDS * ri->link->refcount) return;
/* If this is a slave of a master in O_DOWN condition we start sending
* it INFO every second, instead of the usual SENTINEL_INFO_PERIOD
@@ -2374,10 +2575,11 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
(now - ri->info_refresh) > info_period))
{
/* Send INFO to masters and slaves, not sentinels. */
- retval = redisAsyncCommand(ri->cc,
- sentinelInfoReplyCallback, NULL, "INFO");
- if (retval == REDIS_OK) ri->pending_commands++;
- } else if ((now - ri->last_pong_time) > ping_period) {
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelInfoReplyCallback, ri, "INFO");
+ if (retval == REDIS_OK) ri->link->pending_commands++;
+ } else if ((now - ri->link->last_pong_time) > ping_period &&
+ (now - ri->link->last_ping_time) > ping_period/2) {
/* Send PING to all the three kinds of instances. */
sentinelSendPing(ri);
} else if ((now - ri->last_pub_time) > SENTINEL_PUBLISH_PERIOD) {
@@ -2431,7 +2633,7 @@ void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
if (ri->flags & SRI_MASTER) flags = sdscat(flags,"master,");
if (ri->flags & SRI_SLAVE) flags = sdscat(flags,"slave,");
if (ri->flags & SRI_SENTINEL) flags = sdscat(flags,"sentinel,");
- if (ri->flags & SRI_DISCONNECTED) flags = sdscat(flags,"disconnected,");
+ if (ri->link->disconnected) flags = sdscat(flags,"disconnected,");
if (ri->flags & SRI_MASTER_DOWN) flags = sdscat(flags,"master_down,");
if (ri->flags & SRI_FAILOVER_IN_PROGRESS)
flags = sdscat(flags,"failover_in_progress,");
@@ -2445,8 +2647,12 @@ void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
sdsfree(flags);
fields++;
- addReplyBulkCString(c,"pending-commands");
- addReplyBulkLongLong(c,ri->pending_commands);
+ addReplyBulkCString(c,"link-pending-commands");
+ addReplyBulkLongLong(c,ri->link->pending_commands);
+ fields++;
+
+ addReplyBulkCString(c,"link-refcount");
+ addReplyBulkLongLong(c,ri->link->refcount);
fields++;
if (ri->flags & SRI_FAILOVER_IN_PROGRESS) {
@@ -2457,15 +2663,15 @@ void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
addReplyBulkCString(c,"last-ping-sent");
addReplyBulkLongLong(c,
- ri->last_ping_time ? (mstime() - ri->last_ping_time) : 0);
+ ri->link->act_ping_time ? (mstime() - ri->link->act_ping_time) : 0);
fields++;
addReplyBulkCString(c,"last-ok-ping-reply");
- addReplyBulkLongLong(c,mstime() - ri->last_avail_time);
+ addReplyBulkLongLong(c,mstime() - ri->link->last_avail_time);
fields++;
addReplyBulkCString(c,"last-ping-reply");
- addReplyBulkLongLong(c,mstime() - ri->last_pong_time);
+ addReplyBulkLongLong(c,mstime() - ri->link->last_pong_time);
fields++;
if (ri->flags & SRI_S_DOWN) {
@@ -2619,6 +2825,31 @@ sentinelRedisInstance *sentinelGetMasterByNameOrReplyError(redisClient *c,
return ri;
}
+#define SENTINEL_ISQR_OK 0
+#define SENTINEL_ISQR_NOQUORUM (1<<0)
+#define SENTINEL_ISQR_NOAUTH (1<<1)
+int sentinelIsQuorumReachable(sentinelRedisInstance *master, int *usableptr) {
+ dictIterator *di;
+ dictEntry *de;
+ int usable = 1; /* Number of usable Sentinels. Init to 1 to count myself. */
+ int result = SENTINEL_ISQR_OK;
+ int voters = dictSize(master->sentinels)+1; /* Known Sentinels + myself. */
+
+ di = dictGetIterator(master->sentinels);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+
+ if (ri->flags & (SRI_S_DOWN|SRI_O_DOWN)) continue;
+ usable++;
+ }
+ dictReleaseIterator(di);
+
+ if (usable < (int)master->quorum) result |= SENTINEL_ISQR_NOQUORUM;
+ if (usable < voters/2+1) result |= SENTINEL_ISQR_NOAUTH;
+ if (usableptr) *usableptr = usable;
+ return result;
+}
+
void sentinelCommand(redisClient *c) {
if (!strcasecmp(c->argv[1]->ptr,"masters")) {
/* SENTINEL MASTERS */
@@ -2649,7 +2880,23 @@ void sentinelCommand(redisClient *c) {
return;
addReplyDictOfRedisInstances(c,ri->sentinels);
} else if (!strcasecmp(c->argv[1]->ptr,"is-master-down-by-addr")) {
- /* SENTINEL IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid>*/
+ /* SENTINEL IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid>
+ *
+ * Arguments:
+ *
+ * ip and port are the ip and port of the master we want to be
+ * checked by Sentinel. Note that the command will not check by
+ * name but just by master, in theory different Sentinels may monitor
+ * differnet masters with the same name.
+ *
+ * current-epoch is needed in order to understand if we are allowed
+ * to vote for a failover leader or not. Each Sentinel can vote just
+ * one time per epoch.
+ *
+ * runid is "*" if we are not seeking for a vote from the Sentinel
+ * in order to elect the failover leader. Otherwise it is set to the
+ * runid we want the Sentinel to vote if it did not already voted.
+ */
sentinelRedisInstance *ri;
long long req_epoch;
uint64_t leader_epoch = 0;
@@ -2775,6 +3022,10 @@ void sentinelCommand(redisClient *c) {
sentinelEvent(REDIS_WARNING,"+monitor",ri,"%@ quorum %d",ri->quorum);
addReply(c,shared.ok);
}
+ } else if (!strcasecmp(c->argv[1]->ptr,"flushconfig")) {
+ sentinelFlushConfig();
+ addReply(c,shared.ok);
+ return;
} else if (!strcasecmp(c->argv[1]->ptr,"remove")) {
/* SENTINEL REMOVE <name> */
sentinelRedisInstance *ri;
@@ -2785,10 +3036,37 @@ void sentinelCommand(redisClient *c) {
dictDelete(sentinel.masters,c->argv[2]->ptr);
sentinelFlushConfig();
addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"ckquorum")) {
+ /* SENTINEL CKQUORUM <name> */
+ sentinelRedisInstance *ri;
+ int usable;
+
+ if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
+ == NULL) return;
+ int result = sentinelIsQuorumReachable(ri,&usable);
+ if (result == SENTINEL_ISQR_OK) {
+ addReplySds(c, sdscatfmt(sdsempty(),
+ "+OK %i usable Sentinels. Quorum and failover authorization "
+ "can be reached\r\n",usable));
+ } else {
+ sds e = sdscatfmt(sdsempty(),
+ "-NOQUORUM %i usable Sentinels. ",usable);
+ if (result & SENTINEL_ISQR_NOQUORUM)
+ e = sdscat(e,"Not enough available Sentinels to reach the"
+ " specified quorum for this master");
+ if (result & SENTINEL_ISQR_NOAUTH) {
+ if (result & SENTINEL_ISQR_NOQUORUM) e = sdscat(e,". ");
+ e = sdscat(e, "Not enough available Sentinels to reach the"
+ " majority and authorize a failover");
+ }
+ e = sdscat(e,"\r\n");
+ addReplySds(c,e);
+ }
} else if (!strcasecmp(c->argv[1]->ptr,"set")) {
if (c->argc < 3 || c->argc % 2 == 0) goto numargserr;
sentinelSetCommand(c);
} else if (!strcasecmp(c->argv[1]->ptr,"info-cache")) {
+ /* SENTINEL INFO-CACHE <name> */
if (c->argc < 2) goto numargserr;
mstime_t now = mstime();
@@ -2849,6 +3127,33 @@ void sentinelCommand(redisClient *c) {
}
dictReleaseIterator(di);
if (masters_local != sentinel.masters) dictRelease(masters_local);
+ } else if (!strcasecmp(c->argv[1]->ptr,"simulate-failure")) {
+ /* SENTINEL SIMULATE-FAILURE <flag> <flag> ... <flag> */
+ int j;
+
+ sentinel.simfailure_flags = SENTINEL_SIMFAILURE_NONE;
+ for (j = 2; j < c->argc; j++) {
+ if (!strcasecmp(c->argv[j]->ptr,"crash-after-election")) {
+ sentinel.simfailure_flags |=
+ SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION;
+ redisLog(REDIS_WARNING,"Failure simulation: this Sentinel "
+ "will crash after being successfully elected as failover "
+ "leader");
+ } else if (!strcasecmp(c->argv[j]->ptr,"crash-after-promotion")) {
+ sentinel.simfailure_flags |=
+ SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION;
+ redisLog(REDIS_WARNING,"Failure simulation: this Sentinel "
+ "will crash after promoting the selected slave to master");
+ } else if (!strcasecmp(c->argv[j]->ptr,"help")) {
+ addReplyMultiBulkLen(c,2);
+ addReplyBulkCString(c,"crash-after-election");
+ addReplyBulkCString(c,"crash-after-promotion");
+ } else {
+ addReplyError(c,"Unknown failure simulation specified");
+ return;
+ }
+ }
+ addReply(c,shared.ok);
} else {
addReplyErrorFormat(c,"Unknown sentinel subcommand '%s'",
(char*)c->argv[1]->ptr);
@@ -2896,11 +3201,13 @@ void sentinelInfoCommand(redisClient *c) {
"sentinel_masters:%lu\r\n"
"sentinel_tilt:%d\r\n"
"sentinel_running_scripts:%d\r\n"
- "sentinel_scripts_queue_length:%ld\r\n",
+ "sentinel_scripts_queue_length:%ld\r\n"
+ "sentinel_simulate_failure_flags:%lu\r\n",
dictSize(sentinel.masters),
sentinel.tilt,
sentinel.running_scripts,
- listLength(sentinel.scripts_queue));
+ listLength(sentinel.scripts_queue),
+ sentinel.simfailure_flags);
di = dictGetIterator(sentinel.masters);
while((de = dictNext(di)) != NULL) {
@@ -3051,8 +3358,8 @@ void sentinelPublishCommand(redisClient *c) {
void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
mstime_t elapsed = 0;
- if (ri->last_ping_time)
- elapsed = mstime() - ri->last_ping_time;
+ if (ri->link->act_ping_time)
+ elapsed = mstime() - ri->link->act_ping_time;
/* Check if we are in need for a reconnection of one of the
* links, because we are detecting low activity.
@@ -3060,15 +3367,16 @@ void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
* 1) Check if the command link seems connected, was connected not less
* than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have a
* pending ping for more than half the timeout. */
- if (ri->cc &&
- (mstime() - ri->cc_conn_time) > SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
- ri->last_ping_time != 0 && /* Ther is a pending ping... */
+ if (ri->link->cc &&
+ (mstime() - ri->link->cc_conn_time) >
+ SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
+ ri->link->act_ping_time != 0 && /* Ther is a pending ping... */
/* The pending ping is delayed, and we did not received
* error replies as well. */
- (mstime() - ri->last_ping_time) > (ri->down_after_period/2) &&
- (mstime() - ri->last_pong_time) > (ri->down_after_period/2))
+ (mstime() - ri->link->act_ping_time) > (ri->down_after_period/2) &&
+ (mstime() - ri->link->last_pong_time) > (ri->down_after_period/2))
{
- sentinelKillLink(ri,ri->cc);
+ instanceLinkCloseConnection(ri->link,ri->link->cc);
}
/* 2) Check if the pubsub link seems connected, was connected not less
@@ -3076,11 +3384,12 @@ void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
* activity in the Pub/Sub channel for more than
* SENTINEL_PUBLISH_PERIOD * 3.
*/
- if (ri->pc &&
- (mstime() - ri->pc_conn_time) > SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
- (mstime() - ri->pc_last_activity) > (SENTINEL_PUBLISH_PERIOD*3))
+ if (ri->link->pc &&
+ (mstime() - ri->link->pc_conn_time) >
+ SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
+ (mstime() - ri->link->pc_last_activity) > (SENTINEL_PUBLISH_PERIOD*3))
{
- sentinelKillLink(ri,ri->pc);
+ instanceLinkCloseConnection(ri->link,ri->link->pc);
}
/* Update the SDOWN flag. We believe the instance is SDOWN if:
@@ -3154,12 +3463,12 @@ void sentinelCheckObjectivelyDown(sentinelRedisInstance *master) {
/* Receive the SENTINEL is-master-down-by-addr reply, see the
* sentinelAskMasterStateToOtherSentinels() function for more information. */
void sentinelReceiveIsMasterDownReply(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
redisReply *r;
- REDIS_NOTUSED(privdata);
- if (ri) ri->pending_commands--;
- if (!reply || !ri) return;
+ if (!reply || !link) return;
+ link->pending_commands--;
r = reply;
/* Ignore every error or unexpected reply.
@@ -3220,27 +3529,34 @@ void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int f
* 2) Sentinel is connected.
* 3) We did not received the info within SENTINEL_ASK_PERIOD ms. */
if ((master->flags & SRI_S_DOWN) == 0) continue;
- if (ri->flags & SRI_DISCONNECTED) continue;
+ if (ri->link->disconnected) continue;
if (!(flags & SENTINEL_ASK_FORCED) &&
mstime() - ri->last_master_down_reply_time < SENTINEL_ASK_PERIOD)
continue;
/* Ask */
ll2string(port,sizeof(port),master->addr->port);
- retval = redisAsyncCommand(ri->cc,
- sentinelReceiveIsMasterDownReply, NULL,
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelReceiveIsMasterDownReply, ri,
"SENTINEL is-master-down-by-addr %s %s %llu %s",
master->addr->ip, port,
sentinel.current_epoch,
(master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?
- server.runid : "*");
- if (retval == REDIS_OK) ri->pending_commands++;
+ sentinel.myid : "*");
+ if (retval == REDIS_OK) ri->link->pending_commands++;
}
dictReleaseIterator(di);
}
/* =============================== FAILOVER ================================= */
+/* Crash because of user request via SENTINEL simulate-failure command. */
+void sentinelSimFailureCrash(void) {
+ redisLog(REDIS_WARNING,
+ "Sentinel CRASH because of SENTINEL simulate-failure");
+ exit(99);
+}
+
/* Vote for the sentinel with 'req_runid' or return the old vote if already
* voted for the specifed 'req_epoch' or one greater.
*
@@ -3265,7 +3581,7 @@ char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char
/* If we did not voted for ourselves, set the master failover start
* time to now, in order to force a delay before we can start a
* failover for the same master. */
- if (strcasecmp(master->leader,server.runid))
+ if (strcasecmp(master->leader,sentinel.myid))
master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;
}
@@ -3346,7 +3662,7 @@ char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) {
if (winner)
myvote = sentinelVoteLeader(master,epoch,winner,&leader_epoch);
else
- myvote = sentinelVoteLeader(master,epoch,server.runid,&leader_epoch);
+ myvote = sentinelVoteLeader(master,epoch,sentinel.myid,&leader_epoch);
if (myvote && leader_epoch == epoch) {
uint64_t votes = sentinelLeaderIncr(counters,myvote);
@@ -3400,35 +3716,35 @@ int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
*
* Note that we don't check the replies returned by commands, since we
* will observe instead the effects in the next INFO output. */
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "MULTI");
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "MULTI");
if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
+ ri->link->pending_commands++;
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s", host, portstr);
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "SLAVEOF %s %s", host, portstr);
if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
+ ri->link->pending_commands++;
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "CONFIG REWRITE");
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "CONFIG REWRITE");
if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
+ ri->link->pending_commands++;
/* CLIENT KILL TYPE <type> is only supported starting from Redis 2.8.12,
* however sending it to an instance not understanding this command is not
* an issue because CLIENT is variadic command, so Redis will not
* recognized as a syntax error, and the transaction will not fail (but
* only the unsupported command will fail). */
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "CLIENT KILL TYPE normal");
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "CLIENT KILL TYPE normal");
if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
+ ri->link->pending_commands++;
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "EXEC");
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "EXEC");
if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
+ ri->link->pending_commands++;
return REDIS_OK;
}
@@ -3566,8 +3882,9 @@ sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master) {
sentinelRedisInstance *slave = dictGetVal(de);
mstime_t info_validity_time;
- if (slave->flags & (SRI_S_DOWN|SRI_O_DOWN|SRI_DISCONNECTED)) continue;
- if (mstime() - slave->last_avail_time > SENTINEL_PING_PERIOD*5) continue;
+ if (slave->flags & (SRI_S_DOWN|SRI_O_DOWN)) continue;
+ if (slave->link->disconnected) continue;
+ if (mstime() - slave->link->last_avail_time > SENTINEL_PING_PERIOD*5) continue;
if (slave->slave_priority == 0) continue;
/* If the master is in SDOWN state we get INFO for slaves every second.
@@ -3598,7 +3915,7 @@ void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
/* Check if we are the leader for the failover epoch. */
leader = sentinelGetLeader(ri, ri->failover_epoch);
- isleader = leader && strcasecmp(leader,server.runid) == 0;
+ isleader = leader && strcasecmp(leader,sentinel.myid) == 0;
sdsfree(leader);
/* If I'm not the leader, and it is not a forced failover via
@@ -3618,6 +3935,8 @@ void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
return;
}
sentinelEvent(REDIS_WARNING,"+elected-leader",ri,"%@");
+ if (sentinel.simfailure_flags & SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION)
+ sentinelSimFailureCrash();
ri->failover_state = SENTINEL_FAILOVER_STATE_SELECT_SLAVE;
ri->failover_state_change_time = mstime();
sentinelEvent(REDIS_WARNING,"+failover-state-select-slave",ri,"%@");
@@ -3648,7 +3967,7 @@ void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
/* We can't send the command to the promoted slave if it is now
* disconnected. Retry again and again with this state until the timeout
* is reached, then abort the failover. */
- if (ri->promoted_slave->flags & SRI_DISCONNECTED) {
+ if (ri->link->disconnected) {
if (mstime() - ri->failover_state_change_time > ri->failover_timeout) {
sentinelEvent(REDIS_WARNING,"-failover-abort-slave-timeout",ri,"%@");
sentinelAbortFailover(ri);
@@ -3727,8 +4046,8 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
sentinelRedisInstance *slave = dictGetVal(de);
int retval;
- if (slave->flags &
- (SRI_RECONF_DONE|SRI_RECONF_SENT|SRI_DISCONNECTED)) continue;
+ if (slave->flags & (SRI_RECONF_DONE|SRI_RECONF_SENT)) continue;
+ if (slave->link->disconnected) continue;
retval = sentinelSendSlaveOf(slave,
master->promoted_slave->addr->ip,
@@ -3783,8 +4102,8 @@ void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
/* Nothing to do for instances that are disconnected or already
* in RECONF_SENT state. */
- if (slave->flags & (SRI_DISCONNECTED|SRI_RECONF_SENT|SRI_RECONF_INPROG))
- continue;
+ if (slave->flags & (SRI_RECONF_SENT|SRI_RECONF_INPROG)) continue;
+ if (slave->link->disconnected) continue;
/* Send SLAVEOF <new master>. */
retval = sentinelSendSlaveOf(slave,
diff --git a/src/sha1.c b/src/sha1.c
index 7f73b40d3..ce487e367 100644
--- a/src/sha1.c
+++ b/src/sha1.c
@@ -23,7 +23,7 @@ A million repetitions of "a"
#include <stdio.h>
#include <string.h>
-#include <sys/types.h> /* for u_int*_t */
+#include <stdint.h>
#include "solarisfixes.h"
#include "sha1.h"
#include "config.h"
@@ -53,12 +53,12 @@ A million repetitions of "a"
/* Hash a single 512-bit block. This is the core of the algorithm. */
-void SHA1Transform(u_int32_t state[5], const unsigned char buffer[64])
+void SHA1Transform(uint32_t state[5], const unsigned char buffer[64])
{
- u_int32_t a, b, c, d, e;
+ uint32_t a, b, c, d, e;
typedef union {
unsigned char c[64];
- u_int32_t l[16];
+ uint32_t l[16];
} CHAR64LONG16;
#ifdef SHA1HANDSOFF
CHAR64LONG16 block[1]; /* use array to appear as a pointer */
@@ -128,9 +128,9 @@ void SHA1Init(SHA1_CTX* context)
/* Run your data through this. */
-void SHA1Update(SHA1_CTX* context, const unsigned char* data, u_int32_t len)
+void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len)
{
- u_int32_t i, j;
+ uint32_t i, j;
j = context->count[0];
if ((context->count[0] += len << 3) < j)
@@ -168,7 +168,7 @@ void SHA1Final(unsigned char digest[20], SHA1_CTX* context)
for (i = 0; i < 2; i++)
{
- u_int32_t t = context->count[i];
+ uint32_t t = context->count[i];
int j;
for (j = 0; j < 4; t >>= 8, j++)
diff --git a/src/sha1.h b/src/sha1.h
index 4c76d19da..f41691258 100644
--- a/src/sha1.h
+++ b/src/sha1.h
@@ -8,14 +8,14 @@ By Steve Reid <steve@edmweb.com>
*/
typedef struct {
- u_int32_t state[5];
- u_int32_t count[2];
+ uint32_t state[5];
+ uint32_t count[2];
unsigned char buffer[64];
} SHA1_CTX;
-void SHA1Transform(u_int32_t state[5], const unsigned char buffer[64]);
+void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]);
void SHA1Init(SHA1_CTX* context);
-void SHA1Update(SHA1_CTX* context, const unsigned char* data, u_int32_t len);
+void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len);
void SHA1Final(unsigned char digest[20], SHA1_CTX* context);
#ifdef REDIS_TEST
diff --git a/src/sort.c b/src/sort.c
index c1b571313..7da4de152 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -209,7 +209,7 @@ void sortCommand(redisClient *c) {
}
/* Create a list of operations to perform for every sorted element.
- * Operations can be GET/DEL/INCR/DECR */
+ * Operations can be GET */
operations = listCreate();
listSetFreeMethod(operations,zfree);
j = 2; /* options start at argv[2] */
diff --git a/src/t_set.c b/src/t_set.c
index c8141c3f6..e8ce783c0 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -144,7 +144,11 @@ void setTypeReleaseIterator(setTypeIterator *si) {
* Since set elements can be internally be stored as redis objects or
* simple arrays of integers, setTypeNext returns the encoding of the
* set object you are iterating, and will populate the appropriate pointer
- * (eobj) or (llobj) accordingly.
+ * (objele) or (llele) accordingly.
+ *
+ * Note that both the objele and llele pointers should be passed and cannot
+ * be NULL since the function will try to defensively populate the non
+ * used field with values which are easy to trap if misused.
*
* When there are no longer elements -1 is returned.
* Returned objects ref count is not incremented, so this function is
@@ -154,9 +158,13 @@ int setTypeNext(setTypeIterator *si, robj **objele, int64_t *llele) {
dictEntry *de = dictNext(si->di);
if (de == NULL) return -1;
*objele = dictGetKey(de);
+ *llele = -123456789; /* Not needed. Defensive. */
} else if (si->encoding == REDIS_ENCODING_INTSET) {
if (!intsetGet(si->subject->ptr,si->ii++,llele))
return -1;
+ *objele = NULL; /* Not needed. Defensive. */
+ } else {
+ redisPanic("Wrong set encoding in setTypeNext");
}
return si->encoding;
}
@@ -197,6 +205,10 @@ robj *setTypeNextObject(setTypeIterator *si) {
* field of the object and is used by the caller to check if the
* int64_t pointer or the redis object pointer was populated.
*
+ * Note that both the objele and llele pointers should be passed and cannot
+ * be NULL since the function will try to defensively populate the non
+ * used field with values which are easy to trap if misused.
+ *
* When an object is returned (the set was a real set) the ref count
* of the object is not incremented so this function can be considered
* copy on write friendly. */
@@ -204,8 +216,10 @@ int setTypeRandomElement(robj *setobj, robj **objele, int64_t *llele) {
if (setobj->encoding == REDIS_ENCODING_HT) {
dictEntry *de = dictGetRandomKey(setobj->ptr);
*objele = dictGetKey(de);
+ *llele = -123456789; /* Not needed. Defensive. */
} else if (setobj->encoding == REDIS_ENCODING_INTSET) {
*llele = intsetRandom(setobj->ptr);
+ *objele = NULL; /* Not needed. Defensive. */
} else {
redisPanic("Unknown set encoding");
}
@@ -240,7 +254,7 @@ void setTypeConvert(robj *setobj, int enc) {
/* To add the elements we extract integers and create redis objects */
si = setTypeInitIterator(setobj);
- while (setTypeNext(si,NULL,&intele) != -1) {
+ while (setTypeNext(si,&element,&intele) != -1) {
element = createStringObjectFromLongLong(intele);
redisAssertWithInfo(NULL,element,
dictAdd(d,element,NULL) == DICT_OK);
@@ -329,7 +343,7 @@ void smoveCommand(redisClient *c) {
/* If srcset and dstset are equal, SMOVE is a no-op */
if (srcset == dstset) {
- addReply(c,shared.cone);
+ addReply(c,setTypeIsMember(srcset,ele) ? shared.cone : shared.czero);
return;
}
diff --git a/src/t_zset.c b/src/t_zset.c
index 64418c9b4..386258da1 100644
--- a/src/t_zset.c
+++ b/src/t_zset.c
@@ -213,7 +213,7 @@ static int zslValueGteMin(double value, zrangespec *spec) {
return spec->minex ? (value > spec->min) : (value >= spec->min);
}
-static int zslValueLteMax(double value, zrangespec *spec) {
+int zslValueLteMax(double value, zrangespec *spec) {
return spec->maxex ? (value < spec->max) : (value <= spec->max);
}
@@ -1166,40 +1166,109 @@ void zsetConvert(robj *zobj, int encoding) {
}
}
+/* Return (by reference) the score of the specified member of the sorted set
+ * storing it into *score. If the element does not exist REDIS_ERR is returned
+ * otherwise REDIS_OK is returned and *score is correctly populated.
+ * If 'zobj' or 'member' is NULL, REDIS_ERR is returned. */
+int zsetScore(robj *zobj, robj *member, double *score) {
+ if (!zobj || !member) return REDIS_ERR;
+
+ if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zzlFind(zobj->ptr, member, score) == NULL) return REDIS_ERR;
+ } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ dictEntry *de = dictFind(zs->dict, member);
+ if (de == NULL) return REDIS_ERR;
+ *score = *(double*)dictGetVal(de);
+ } else {
+ redisPanic("Unknown sorted set encoding");
+ }
+ return REDIS_OK;
+}
+
/*-----------------------------------------------------------------------------
* Sorted set commands
*----------------------------------------------------------------------------*/
/* This generic command implements both ZADD and ZINCRBY. */
-void zaddGenericCommand(redisClient *c, int incr) {
+#define ZADD_NONE 0
+#define ZADD_INCR (1<<0) /* Increment the score instead of setting it. */
+#define ZADD_NX (1<<1) /* Don't touch elements not already existing. */
+#define ZADD_XX (1<<2) /* Only touch elements already exisitng. */
+#define ZADD_CH (1<<3) /* Return num of elements added or updated. */
+void zaddGenericCommand(redisClient *c, int flags) {
static char *nanerr = "resulting score is not a number (NaN)";
robj *key = c->argv[1];
robj *ele;
robj *zobj;
robj *curobj;
double score = 0, *scores = NULL, curscore = 0.0;
- int j, elements = (c->argc-2)/2;
- int added = 0, updated = 0;
-
- if (c->argc % 2) {
+ int j, elements;
+ int scoreidx = 0;
+ /* The following vars are used in order to track what the command actually
+ * did during the execution, to reply to the client and to trigger the
+ * notification of keyspace change. */
+ int added = 0; /* Number of new elements added. */
+ int updated = 0; /* Number of elements with updated score. */
+ int processed = 0; /* Number of elements processed, may remain zero with
+ options like XX. */
+
+ /* Parse options. At the end 'scoreidx' is set to the argument position
+ * of the score of the first score-element pair. */
+ scoreidx = 2;
+ while(scoreidx < c->argc) {
+ char *opt = c->argv[scoreidx]->ptr;
+ if (!strcasecmp(opt,"nx")) flags |= ZADD_NX;
+ else if (!strcasecmp(opt,"xx")) flags |= ZADD_XX;
+ else if (!strcasecmp(opt,"ch")) flags |= ZADD_CH;
+ else if (!strcasecmp(opt,"incr")) flags |= ZADD_INCR;
+ else break;
+ scoreidx++;
+ }
+
+ /* Turn options into simple to check vars. */
+ int incr = (flags & ZADD_INCR) != 0;
+ int nx = (flags & ZADD_NX) != 0;
+ int xx = (flags & ZADD_XX) != 0;
+ int ch = (flags & ZADD_CH) != 0;
+
+ /* After the options, we expect to have an even number of args, since
+ * we expect any number of score-element pairs. */
+ elements = c->argc-scoreidx;
+ if (elements % 2) {
addReply(c,shared.syntaxerr);
return;
}
+ elements /= 2; /* Now this holds the number of score-element pairs. */
+
+ /* Check for incompatible options. */
+ if (nx && xx) {
+ addReplyError(c,
+ "XX and NX options at the same time are not compatible");
+ return;
+ }
+
+ if (incr && elements > 1) {
+ addReplyError(c,
+ "INCR option supports a single increment-element pair");
+ return;
+ }
/* Start parsing all the scores, we need to emit any syntax error
* before executing additions to the sorted set, as the command should
* either execute fully or nothing at all. */
scores = zmalloc(sizeof(double)*elements);
for (j = 0; j < elements; j++) {
- if (getDoubleFromObjectOrReply(c,c->argv[2+j*2],&scores[j],NULL)
+ if (getDoubleFromObjectOrReply(c,c->argv[scoreidx+j*2],&scores[j],NULL)
!= REDIS_OK) goto cleanup;
}
/* Lookup the key and create the sorted set if does not exist. */
zobj = lookupKeyWrite(c->db,key);
if (zobj == NULL) {
+ if (xx) goto reply_to_client; /* No key + XX option: nothing to do. */
if (server.zset_max_ziplist_entries == 0 ||
- server.zset_max_ziplist_value < sdslen(c->argv[3]->ptr))
+ server.zset_max_ziplist_value < sdslen(c->argv[scoreidx+1]->ptr))
{
zobj = createZsetObject();
} else {
@@ -1220,8 +1289,9 @@ void zaddGenericCommand(redisClient *c, int incr) {
unsigned char *eptr;
/* Prefer non-encoded element when dealing with ziplists. */
- ele = c->argv[3+j*2];
+ ele = c->argv[scoreidx+1+j*2];
if ((eptr = zzlFind(zobj->ptr,ele,&curscore)) != NULL) {
+ if (nx) continue;
if (incr) {
score += curscore;
if (isnan(score)) {
@@ -1237,7 +1307,8 @@ void zaddGenericCommand(redisClient *c, int incr) {
server.dirty++;
updated++;
}
- } else {
+ processed++;
+ } else if (!xx) {
/* Optimize: check if the element is too large or the list
* becomes too long *before* executing zzlInsert. */
zobj->ptr = zzlInsert(zobj->ptr,ele,score);
@@ -1247,15 +1318,18 @@ void zaddGenericCommand(redisClient *c, int incr) {
zsetConvert(zobj,REDIS_ENCODING_SKIPLIST);
server.dirty++;
added++;
+ processed++;
}
} else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
zskiplistNode *znode;
dictEntry *de;
- ele = c->argv[3+j*2] = tryObjectEncoding(c->argv[3+j*2]);
+ ele = c->argv[scoreidx+1+j*2] =
+ tryObjectEncoding(c->argv[scoreidx+1+j*2]);
de = dictFind(zs->dict,ele);
if (de != NULL) {
+ if (nx) continue;
curobj = dictGetKey(de);
curscore = *(double*)dictGetVal(de);
@@ -1280,22 +1354,30 @@ void zaddGenericCommand(redisClient *c, int incr) {
server.dirty++;
updated++;
}
- } else {
+ processed++;
+ } else if (!xx) {
znode = zslInsert(zs->zsl,score,ele);
incrRefCount(ele); /* Inserted in skiplist. */
redisAssertWithInfo(c,NULL,dictAdd(zs->dict,ele,&znode->score) == DICT_OK);
incrRefCount(ele); /* Added to dictionary. */
server.dirty++;
added++;
+ processed++;
}
} else {
redisPanic("Unknown sorted set encoding");
}
}
- if (incr) /* ZINCRBY */
- addReplyDouble(c,score);
- else /* ZADD */
- addReplyLongLong(c,added);
+
+reply_to_client:
+ if (incr) { /* ZINCRBY or INCR option. */
+ if (processed)
+ addReplyDouble(c,score);
+ else
+ addReply(c,shared.nullbulk);
+ } else { /* ZADD. */
+ addReplyLongLong(c,ch ? added+updated : added);
+ }
cleanup:
zfree(scores);
@@ -1307,11 +1389,11 @@ cleanup:
}
void zaddCommand(redisClient *c) {
- zaddGenericCommand(c,0);
+ zaddGenericCommand(c,ZADD_NONE);
}
void zincrbyCommand(redisClient *c) {
- zaddGenericCommand(c,1);
+ zaddGenericCommand(c,ZADD_INCR);
}
void zremCommand(redisClient *c) {
@@ -2753,25 +2835,10 @@ void zscoreCommand(redisClient *c) {
if ((zobj = lookupKeyReadOrReply(c,key,shared.nullbulk)) == NULL ||
checkType(c,zobj,REDIS_ZSET)) return;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
- if (zzlFind(zobj->ptr,c->argv[2],&score) != NULL)
- addReplyDouble(c,score);
- else
- addReply(c,shared.nullbulk);
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
- zset *zs = zobj->ptr;
- dictEntry *de;
-
- c->argv[2] = tryObjectEncoding(c->argv[2]);
- de = dictFind(zs->dict,c->argv[2]);
- if (de != NULL) {
- score = *(double*)dictGetVal(de);
- addReplyDouble(c,score);
- } else {
- addReply(c,shared.nullbulk);
- }
+ if (zsetScore(zobj,c->argv[2],&score) == REDIS_ERR) {
+ addReply(c,shared.nullbulk);
} else {
- redisPanic("Unknown sorted set encoding");
+ addReplyDouble(c,score);
}
}
@@ -2819,7 +2886,7 @@ void zrankGenericCommand(redisClient *c, int reverse) {
dictEntry *de;
double score;
- ele = c->argv[2] = tryObjectEncoding(c->argv[2]);
+ ele = c->argv[2];
de = dictFind(zs->dict,ele);
if (de != NULL) {
score = *(double*)dictGetVal(de);
diff --git a/src/ziplist.h b/src/ziplist.h
index e92b5e783..ae96823f9 100644
--- a/src/ziplist.h
+++ b/src/ziplist.h
@@ -28,6 +28,9 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#ifndef _ZIPLIST_H
+#define _ZIPLIST_H
+
#define ZIPLIST_HEAD 0
#define ZIPLIST_TAIL 1
@@ -49,3 +52,5 @@ size_t ziplistBlobLen(unsigned char *zl);
#ifdef REDIS_TEST
int ziplistTest(int argc, char *argv[]);
#endif
+
+#endif /* _ZIPLIST_H */
diff --git a/tests/cluster/run.tcl b/tests/cluster/run.tcl
index f764cea0a..93603ddc9 100644
--- a/tests/cluster/run.tcl
+++ b/tests/cluster/run.tcl
@@ -17,6 +17,7 @@ proc main {} {
}
run_tests
cleanup
+ end_tests
}
if {[catch main e]} {
diff --git a/tests/cluster/tests/10-manual-failover.tcl b/tests/cluster/tests/10-manual-failover.tcl
new file mode 100644
index 000000000..5441b79f3
--- /dev/null
+++ b/tests/cluster/tests/10-manual-failover.tcl
@@ -0,0 +1,192 @@
+# Check the manual failover
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+ assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+ wait_for_condition 1000 50 {
+ [RI 5 master_link_status] eq {up}
+ } else {
+ fail "Instance #5 master link status is not up"
+ }
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+set numkeys 50000
+set numops 10000
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+catch {unset content}
+array set content {}
+
+test "Send CLUSTER FAILOVER to #5, during load" {
+ for {set j 0} {$j < $numops} {incr j} {
+ # Write random data to random list.
+ set listid [randomInt $numkeys]
+ set key "key:$listid"
+ set ele [randomValue]
+ # We write both with Lua scripts and with plain commands.
+ # This way we are able to stress Lua -> Redis command invocation
+ # as well, that has tests to prevent Lua to write into wrong
+ # hash slots.
+ if {$listid % 2} {
+ $cluster rpush $key $ele
+ } else {
+ $cluster eval {redis.call("rpush",KEYS[1],ARGV[1])} 1 $key $ele
+ }
+ lappend content($key) $ele
+
+ if {($j % 1000) == 0} {
+ puts -nonewline W; flush stdout
+ }
+
+ if {$j == $numops/2} {R 5 cluster failover}
+ }
+}
+
+test "Wait for failover" {
+ wait_for_condition 1000 50 {
+ [CI 1 cluster_current_epoch] > $current_epoch
+ } else {
+ fail "No failover detected"
+ }
+}
+
+test "Cluster should eventually be up again" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 1
+}
+
+test "Instance #5 is now a master" {
+ assert {[RI 5 role] eq {master}}
+}
+
+test "Verify $numkeys keys for consistency with logical content" {
+ # Check that the Redis Cluster content matches our logical content.
+ foreach {key value} [array get content] {
+ assert {[$cluster lrange $key 0 -1] eq $value}
+ }
+}
+
+test "Instance #0 gets converted into a slave" {
+ wait_for_condition 1000 50 {
+ [RI 0 role] eq {slave}
+ } else {
+ fail "Old master was not converted into slave"
+ }
+}
+
+## Check that manual failover does not happen if we can't talk with the master.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+ assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+ wait_for_condition 1000 50 {
+ [RI 5 master_link_status] eq {up}
+ } else {
+ fail "Instance #5 master link status is not up"
+ }
+}
+
+test "Make instance #0 unreachable without killing it" {
+ R 0 deferred 1
+ R 0 DEBUG SLEEP 10
+}
+
+test "Send CLUSTER FAILOVER to instance #5" {
+ R 5 cluster failover
+}
+
+test "Instance #5 is still a slave after some time (no failover)" {
+ after 5000
+ assert {[RI 5 role] eq {master}}
+}
+
+test "Wait for instance #0 to return back alive" {
+ R 0 deferred 0
+ assert {[R 0 read] eq {OK}}
+}
+
+## Check with "force" failover happens anyway.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+ assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+ wait_for_condition 1000 50 {
+ [RI 5 master_link_status] eq {up}
+ } else {
+ fail "Instance #5 master link status is not up"
+ }
+}
+
+test "Make instance #0 unreachable without killing it" {
+ R 0 deferred 1
+ R 0 DEBUG SLEEP 10
+}
+
+test "Send CLUSTER FAILOVER to instance #5" {
+ R 5 cluster failover force
+}
+
+test "Instance #5 is a master after some time" {
+ wait_for_condition 1000 50 {
+ [RI 5 role] eq {master}
+ } else {
+ fail "Instance #5 is not a master after some time regardless of FORCE"
+ }
+}
+
+test "Wait for instance #0 to return back alive" {
+ R 0 deferred 0
+ assert {[R 0 read] eq {OK}}
+}
diff --git a/tests/cluster/tests/11-manual-takeover.tcl b/tests/cluster/tests/11-manual-takeover.tcl
new file mode 100644
index 000000000..f567c6962
--- /dev/null
+++ b/tests/cluster/tests/11-manual-takeover.tcl
@@ -0,0 +1,59 @@
+# Manual takeover test
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Killing majority of master nodes" {
+ kill_instance redis 0
+ kill_instance redis 1
+ kill_instance redis 2
+}
+
+test "Cluster should eventually be down" {
+ assert_cluster_state fail
+}
+
+test "Use takeover to bring slaves back" {
+ R 5 cluster failover takeover
+ R 6 cluster failover takeover
+ R 7 cluster failover takeover
+}
+
+test "Cluster should eventually be up again" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 4
+}
+
+test "Instance #5, #6, #7 are now masters" {
+ assert {[RI 5 role] eq {master}}
+ assert {[RI 6 role] eq {master}}
+ assert {[RI 7 role] eq {master}}
+}
+
+test "Restarting the previously killed master nodes" {
+ restart_instance redis 0
+ restart_instance redis 1
+ restart_instance redis 2
+}
+
+test "Instance #0, #1, #2 gets converted into a slaves" {
+ wait_for_condition 1000 50 {
+ [RI 0 role] eq {slave} && [RI 1 role] eq {slave} && [RI 2 role] eq {slave}
+ } else {
+ fail "Old masters not converted into slaves"
+ }
+}
diff --git a/tests/instances.tcl b/tests/instances.tcl
index 353d9b2d2..370d5e3aa 100644
--- a/tests/instances.tcl
+++ b/tests/instances.tcl
@@ -19,6 +19,7 @@ set ::verbose 0
set ::valgrind 0
set ::pause_on_error 0
set ::simulate_error 0
+set ::failed 0
set ::sentinel_instances {}
set ::redis_instances {}
set ::sentinel_base_port 20000
@@ -231,6 +232,7 @@ proc test {descr code} {
flush stdout
if {[catch {set retval [uplevel 1 $code]} error]} {
+ incr ::failed
if {[string match "assertion:*" $error]} {
set msg [string range $error 10 end]
puts [colorstr red $msg]
@@ -246,6 +248,7 @@ proc test {descr code} {
}
}
+# Execute all the units inside the 'tests' directory.
proc run_tests {} {
set tests [lsort [glob ../tests/*]]
foreach test $tests {
@@ -258,6 +261,17 @@ proc run_tests {} {
}
}
+# Print a message and exists with 0 / 1 according to zero or more failures.
+proc end_tests {} {
+ if {$::failed == 0} {
+ puts "GOOD! No errors."
+ exit 0
+ } else {
+ puts "WARNING $::failed tests faield."
+ exit 1
+ }
+}
+
# The "S" command is used to interact with the N-th Sentinel.
# The general form is:
#
diff --git a/tests/integration/replication-4.tcl b/tests/integration/replication-4.tcl
index c2b314567..4f6e8c18c 100644
--- a/tests/integration/replication-4.tcl
+++ b/tests/integration/replication-4.tcl
@@ -126,7 +126,7 @@ start_server {tags {"repl"}} {
test {Replication of SPOP command -- alsoPropagate() API} {
$master del myset
- set size [randomInt 100]
+ set size [expr 1+[randomInt 100]]
set content {}
for {set j 0} {$j < $size} {incr j} {
lappend content [randomValue]
diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl
index d124c6f1f..ece764911 100644
--- a/tests/integration/replication.tcl
+++ b/tests/integration/replication.tcl
@@ -1,10 +1,17 @@
start_server {tags {"repl"}} {
+ set A [srv 0 client]
+ set A_host [srv 0 host]
+ set A_port [srv 0 port]
start_server {} {
- test {First server should have role slave after SLAVEOF} {
- r -1 slaveof [srv 0 host] [srv 0 port]
+ set B [srv 0 client]
+ set B_host [srv 0 host]
+ set B_port [srv 0 port]
+
+ test {Set instance A as slave of B} {
+ $A slaveof $B_host $B_port
wait_for_condition 50 100 {
- [s -1 role] eq {slave} &&
- [string match {*master_link_status:up*} [r -1 info replication]]
+ [lindex [$A role] 0] eq {slave} &&
+ [string match {*master_link_status:up*} [$A info replication]]
} else {
fail "Can't turn the instance into a slave"
}
@@ -15,9 +22,9 @@ start_server {tags {"repl"}} {
$rd brpoplpush a b 5
r lpush a foo
wait_for_condition 50 100 {
- [r debug digest] eq [r -1 debug digest]
+ [$A debug digest] eq [$B debug digest]
} else {
- fail "Master and slave have different digest: [r debug digest] VS [r -1 debug digest]"
+ fail "Master and slave have different digest: [$A debug digest] VS [$B debug digest]"
}
}
@@ -28,7 +35,36 @@ start_server {tags {"repl"}} {
r lpush c 3
$rd brpoplpush c d 5
after 1000
- assert_equal [r debug digest] [r -1 debug digest]
+ assert_equal [$A debug digest] [$B debug digest]
+ }
+
+ test {BLPOP followed by role change, issue #2473} {
+ set rd [redis_deferring_client]
+ $rd blpop foo 0 ; # Block while B is a master
+
+ # Turn B into master of A
+ $A slaveof no one
+ $B slaveof $A_host $A_port
+ wait_for_condition 50 100 {
+ [lindex [$B role] 0] eq {slave} &&
+ [string match {*master_link_status:up*} [$B info replication]]
+ } else {
+ fail "Can't turn the instance into a slave"
+ }
+
+ # Push elements into the "foo" list of the new slave.
+ # If the client is still attached to the instance, we'll get
+ # a desync between the two instances.
+ $A rpush foo a b c
+ after 100
+
+ wait_for_condition 50 100 {
+ [$A debug digest] eq [$B debug digest] &&
+ [$A lrange foo 0 -1] eq {a b c} &&
+ [$B lrange foo 0 -1] eq {a b c}
+ } else {
+ fail "Master and slave have different digest: [$A debug digest] VS [$B debug digest]"
+ }
}
}
}
@@ -115,7 +151,7 @@ foreach mdl {no yes} {
start_server {} {
lappend slaves [srv 0 client]
test "Connect multiple slaves at the same time (issue #141), master diskless=$mdl, slave diskless=$sdl" {
- # Send SALVEOF commands to slaves
+ # Send SLAVEOF commands to slaves
[lindex $slaves 0] config set repl-diskless-sync $sdl
[lindex $slaves 1] config set repl-diskless-sync $sdl
[lindex $slaves 2] config set repl-diskless-sync $sdl
diff --git a/tests/sentinel/run.tcl b/tests/sentinel/run.tcl
index f33029959..9a2fcfb49 100644
--- a/tests/sentinel/run.tcl
+++ b/tests/sentinel/run.tcl
@@ -13,6 +13,7 @@ proc main {} {
spawn_instance redis $::redis_base_port $::instances_count
run_tests
cleanup
+ end_tests
}
if {[catch main e]} {
diff --git a/tests/sentinel/tests/06-ckquorum.tcl b/tests/sentinel/tests/06-ckquorum.tcl
new file mode 100644
index 000000000..31e5fa2f8
--- /dev/null
+++ b/tests/sentinel/tests/06-ckquorum.tcl
@@ -0,0 +1,34 @@
+# Test for the SENTINEL CKQUORUM command
+
+source "../tests/includes/init-tests.tcl"
+set num_sentinels [llength $::sentinel_instances]
+
+test "CKQUORUM reports OK and the right amount of Sentinels" {
+ foreach_sentinel_id id {
+ assert_match "*OK $num_sentinels usable*" [S $id SENTINEL CKQUORUM mymaster]
+ }
+}
+
+test "CKQUORUM detects quorum cannot be reached" {
+ set orig_quorum [expr {$num_sentinels/2+1}]
+ S 0 SENTINEL SET mymaster quorum [expr {$num_sentinels+1}]
+ catch {[S 0 SENTINEL CKQUORUM mymaster]} err
+ assert_match "*NOQUORUM*" $err
+ S 0 SENTINEL SET mymaster quorum $orig_quorum
+}
+
+test "CKQUORUM detects failover authorization cannot be reached" {
+ set orig_quorum [expr {$num_sentinels/2+1}]
+ S 0 SENTINEL SET mymaster quorum 1
+ kill_instance sentinel 1
+ kill_instance sentinel 2
+ kill_instance sentinel 3
+ after 5000
+ catch {[S 0 SENTINEL CKQUORUM mymaster]} err
+ assert_match "*NOQUORUM*" $err
+ S 0 SENTINEL SET mymaster quorum $orig_quorum
+ restart_instance sentinel 1
+ restart_instance sentinel 2
+ restart_instance sentinel 3
+}
+
diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index 317b40a84..e5c31af28 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -54,10 +54,15 @@ proc kill_server config {
# kill server and wait for the process to be totally exited
catch {exec kill $pid}
+ if {$::valgrind} {
+ set max_wait 60000
+ } else {
+ set max_wait 10000
+ }
while {[is_alive $config]} {
incr wait 10
- if {$wait >= 5000} {
+ if {$wait >= $max_wait} {
puts "Forcing process $pid to exit..."
catch {exec kill -KILL $pid}
} elseif {$wait % 1000 == 0} {
diff --git a/tests/unit/geo.tcl b/tests/unit/geo.tcl
new file mode 100644
index 000000000..1c6b90b63
--- /dev/null
+++ b/tests/unit/geo.tcl
@@ -0,0 +1,158 @@
+# Helper functins to simulate search-in-radius in the Tcl side in order to
+# verify the Redis implementation with a fuzzy test.
+proc geo_degrad deg {expr {$deg*atan(1)*8/360}}
+
+proc geo_distance {lon1d lat1d lon2d lat2d} {
+ set lon1r [geo_degrad $lon1d]
+ set lat1r [geo_degrad $lat1d]
+ set lon2r [geo_degrad $lon2d]
+ set lat2r [geo_degrad $lat2d]
+ set v [expr {sin(($lon2r - $lon1r) / 2)}]
+ set u [expr {sin(($lat2r - $lat1r) / 2)}]
+ expr {2.0 * 6372797.560856 * \
+ asin(sqrt($u * $u + cos($lat1r) * cos($lat2r) * $v * $v))}
+}
+
+proc geo_random_point {lonvar latvar} {
+ upvar 1 $lonvar lon
+ upvar 1 $latvar lat
+ # Note that the actual latitude limit should be -85 to +85, we restrict
+ # the test to -70 to +70 since in this range the algorithm is more precise
+ # while outside this range occasionally some element may be missing.
+ set lon [expr {-180 + rand()*360}]
+ set lat [expr {-70 + rand()*140}]
+}
+
+start_server {tags {"geo"}} {
+ test {GEOADD create} {
+ r geoadd nyc -73.9454966 40.747533 "lic market"
+ } {1}
+
+ test {GEOADD update} {
+ r geoadd nyc -73.9454966 40.747533 "lic market"
+ } {0}
+
+ test {GEOADD invalid coordinates} {
+ catch {
+ r geoadd nyc -73.9454966 40.747533 "lic market" \
+ foo bar "luck market"
+ } err
+ set err
+ } {*valid*}
+
+ test {GEOADD multi add} {
+ r geoadd nyc -73.9733487 40.7648057 "central park n/q/r" -73.9903085 40.7362513 "union square" -74.0131604 40.7126674 "wtc one" -73.7858139 40.6428986 "jfk" -73.9375699 40.7498929 "q4" -73.9564142 40.7480973 4545
+ } {6}
+
+ test {Check geoset values} {
+ r zrange nyc 0 -1 withscores
+ } {{wtc one} 1791873972053020 {union square} 1791875485187452 {central park n/q/r} 1791875761332224 4545 1791875796750882 {lic market} 1791875804419201 q4 1791875830079666 jfk 1791895905559723}
+
+ test {GEORADIUS simple (sorted)} {
+ r georadius nyc -73.9798091 40.7598464 3 km asc
+ } {{central park n/q/r} 4545 {union square}}
+
+ test {GEORADIUS withdist (sorted)} {
+ r georadius nyc -73.9798091 40.7598464 3 km withdist asc
+ } {{{central park n/q/r} 0.7750} {4545 2.3651} {{union square} 2.7697}}
+
+ test {GEORADIUS with COUNT} {
+ r georadius nyc -73.9798091 40.7598464 10 km COUNT 3
+ } {{central park n/q/r} 4545 {union square}}
+
+ test {GEORADIUS with COUNT DESC} {
+ r georadius nyc -73.9798091 40.7598464 10 km COUNT 2 DESC
+ } {{wtc one} q4}
+
+ test {GEORADIUSBYMEMBER simple (sorted)} {
+ r georadiusbymember nyc "wtc one" 7 km
+ } {{wtc one} {union square} {central park n/q/r} 4545 {lic market}}
+
+ test {GEORADIUSBYMEMBER withdist (sorted)} {
+ r georadiusbymember nyc "wtc one" 7 km withdist
+ } {{{wtc one} 0.0000} {{union square} 3.2544} {{central park n/q/r} 6.7000} {4545 6.1975} {{lic market} 6.8969}}
+
+ test {GEOHASH is able to return geohash strings} {
+ # Example from Wikipedia.
+ r del points
+ r geoadd points -5.6 42.6 test
+ lindex [r geohash points test] 0
+ } {ezs42e44yx0}
+
+ test {GEOPOS simple} {
+ r del points
+ r geoadd points 10 20 a 30 40 b
+ lassign [lindex [r geopos points a b] 0] x1 y1
+ lassign [lindex [r geopos points a b] 1] x2 y2
+ assert {abs($x1 - 10) < 0.001}
+ assert {abs($y1 - 20) < 0.001}
+ assert {abs($x2 - 30) < 0.001}
+ assert {abs($y2 - 40) < 0.001}
+ }
+
+ test {GEOPOS missing element} {
+ r del points
+ r geoadd points 10 20 a 30 40 b
+ lindex [r geopos points a x b] 1
+ } {}
+
+ test {GEODIST simple & unit} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ set m [r geodist points Palermo Catania]
+ assert {$m > 166274 && $m < 166275}
+ set km [r geodist points Palermo Catania km]
+ assert {$km > 166.2 && $km < 166.3}
+ }
+
+ test {GEODIST missing elements} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ set m [r geodist points Palermo Agrigento]
+ assert {$m eq {}}
+ set m [r geodist points Ragusa Agrigento]
+ assert {$m eq {}}
+ set m [r geodist empty_key Palermo Catania]
+ assert {$m eq {}}
+ }
+
+ test {GEOADD + GEORANGE randomized test} {
+ set attempt 10
+ while {[incr attempt -1]} {
+ unset -nocomplain debuginfo
+ set srand_seed [randomInt 1000000]
+ lappend debuginfo "srand_seed is $srand_seed"
+ expr {srand($srand_seed)} ; # If you need a reproducible run
+ r del mypoints
+ set radius_km [expr {[randomInt 200]+10}]
+ set radius_m [expr {$radius_km*1000}]
+ geo_random_point search_lon search_lat
+ lappend debuginfo "Search area: $search_lon,$search_lat $radius_km km"
+ set tcl_result {}
+ set argv {}
+ for {set j 0} {$j < 20000} {incr j} {
+ geo_random_point lon lat
+ lappend argv $lon $lat "place:$j"
+ if {[geo_distance $lon $lat $search_lon $search_lat] < $radius_m} {
+ lappend tcl_result "place:$j"
+ lappend debuginfo "place:$j $lon $lat [expr {[geo_distance $lon $lat $search_lon $search_lat]/1000}] km"
+ }
+ }
+ r geoadd mypoints {*}$argv
+ set res [lsort [r georadius mypoints $search_lon $search_lat $radius_km km]]
+ set res2 [lsort $tcl_result]
+ set test_result OK
+ if {$res != $res2} {
+ puts "Redis: $res"
+ puts "Tcl : $res2"
+ puts [join $debuginfo "\n"]
+ set test_result FAIL
+ }
+ unset -nocomplain debuginfo
+ if {$test_result ne {OK}} break
+ }
+ set test_result
+ } {OK}
+}
diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl
index a9a3d0835..7b467f1c4 100644
--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@@ -519,6 +519,7 @@ start_server {
test "SMOVE non existing key" {
setup_move
assert_equal 0 [r smove myset1 myset2 foo]
+ assert_equal 0 [r smove myset1 myset1 foo]
assert_equal {1 a b} [lsort [r smembers myset1]]
assert_equal {2 3 4} [lsort [r smembers myset2]]
}
diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl
index 238eebb9d..cc5601439 100644
--- a/tests/unit/type/zset.tcl
+++ b/tests/unit/type/zset.tcl
@@ -43,6 +43,84 @@ start_server {tags {"zset"}} {
assert_error "*not*float*" {r zadd myzset nan abc}
}
+ test "ZADD with options syntax error with incomplete pair" {
+ r del ztmp
+ catch {r zadd ztmp xx 10 x 20} err
+ set err
+ } {ERR*}
+
+ test "ZADD XX option without key - $encoding" {
+ r del ztmp
+ assert {[r zadd ztmp xx 10 x] == 0}
+ assert {[r type ztmp] eq {none}}
+ }
+
+ test "ZADD XX existing key - $encoding" {
+ r del ztmp
+ r zadd ztmp 10 x
+ assert {[r zadd ztmp xx 20 y] == 0}
+ assert {[r zcard ztmp] == 1}
+ }
+
+ test "ZADD XX returns the number of elements actually added" {
+ r del ztmp
+ r zadd ztmp 10 x
+ set retval [r zadd ztmp 10 x 20 y 30 z]
+ assert {$retval == 2}
+ }
+
+ test "ZADD XX updates existing elements score" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ r zadd ztmp xx 5 foo 11 x 21 y 40 zap
+ assert {[r zcard ztmp] == 3}
+ assert {[r zscore ztmp x] == 11}
+ assert {[r zscore ztmp y] == 21}
+ }
+
+ test "ZADD XX and NX are not compatible" {
+ r del ztmp
+ catch {r zadd ztmp xx nx 10 x} err
+ set err
+ } {ERR*}
+
+ test "ZADD NX with non exisitng key" {
+ r del ztmp
+ r zadd ztmp nx 10 x 20 y 30 z
+ assert {[r zcard ztmp] == 3}
+ }
+
+ test "ZADD NX only add new elements without updating old ones" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ assert {[r zadd ztmp nx 11 x 21 y 100 a 200 b] == 2}
+ assert {[r zscore ztmp x] == 10}
+ assert {[r zscore ztmp y] == 20}
+ assert {[r zscore ztmp a] == 100}
+ assert {[r zscore ztmp b] == 200}
+ }
+
+ test "ZADD INCR works like ZINCRBY" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ r zadd ztmp INCR 15 x
+ assert {[r zscore ztmp x] == 25}
+ }
+
+ test "ZADD INCR works with a single score-elemenet pair" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ catch {r zadd ztmp INCR 15 x 10 y} err
+ set err
+ } {ERR*}
+
+ test "ZADD CH option changes return value to all changed elements" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ assert {[r zadd ztmp 11 x 21 y 30 z] == 0}
+ assert {[r zadd ztmp ch 12 x 22 y 30 z] == 2}
+ }
+
test "ZINCRBY calls leading to NaN result in error" {
r zincrby myzset +inf abc
assert_error "*NaN*" {r zincrby myzset -inf abc}
@@ -77,6 +155,8 @@ start_server {tags {"zset"}} {
}
test "ZCARD basics - $encoding" {
+ r del ztmp
+ r zadd ztmp 10 a 20 b 30 c
assert_equal 3 [r zcard ztmp]
assert_equal 0 [r zcard zdoesntexist]
}
diff --git a/utils/create-cluster/.gitignore b/utils/create-cluster/.gitignore
index cdd7c19c8..2988ee919 100644
--- a/utils/create-cluster/.gitignore
+++ b/utils/create-cluster/.gitignore
@@ -1 +1,5 @@
config.sh
+*.rdb
+*.aof
+*.conf
+*.log
diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster
index efb3135d4..98941496f 100755
--- a/utils/create-cluster/create-cluster
+++ b/utils/create-cluster/create-cluster
@@ -43,7 +43,7 @@ then
while [ $((PORT < ENDPORT)) != "0" ]; do
PORT=$((PORT+1))
echo "Stopping $PORT"
- redis-cli -p $PORT shutdown nosave
+ ../../src/redis-cli -p $PORT shutdown nosave
done
exit 0
fi
@@ -54,7 +54,7 @@ then
while [ 1 ]; do
clear
date
- redis-cli -p $PORT cluster nodes | head -30
+ ../../src/redis-cli -p $PORT cluster nodes | head -30
sleep 1
done
exit 0
diff --git a/utils/releasetools/00_test_release.sh b/utils/releasetools/03_test_release.sh
index 8fb6e9501..8fb6e9501 100755
--- a/utils/releasetools/00_test_release.sh
+++ b/utils/releasetools/03_test_release.sh
diff --git a/utils/releasetools/03_release_hash.sh b/utils/releasetools/04_release_hash.sh
index df082149a..df082149a 100755
--- a/utils/releasetools/03_release_hash.sh
+++ b/utils/releasetools/04_release_hash.sh