summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2014-04-15 17:46:51 +0200
committerantirez <antirez@gmail.com>2014-04-16 15:26:28 +0200
commit614fcd491e50a0f95c08b72dc292c9cc53ba8a5a (patch)
tree3c0738c3445aab9fe14a21138d9669a4c18fd810
parent08da2b79fcb72ab7decadf21201a45cf47c71602 (diff)
downloadredis-614fcd491e50a0f95c08b72dc292c9cc53ba8a5a.tar.gz
User-defined switch point between sparse-dense HLL encodings.
-rw-r--r--redis.conf14
-rw-r--r--src/config.c8
-rw-r--r--src/hyperloglog.c11
-rw-r--r--src/redis.c1
-rw-r--r--src/redis.h4
5 files changed, 32 insertions, 6 deletions
diff --git a/redis.conf b/redis.conf
index d83acbba0..fa87fc9f7 100644
--- a/redis.conf
+++ b/redis.conf
@@ -635,6 +635,20 @@ set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
+# HyperLogLog sparse representation bytes limit. The limit includes the
+# 16 bytes header. When an HyperLogLog using the sparse representation crosses
+# this limit, it is convereted into the dense representation.
+#
+# A value greater than 16000 is totally useless, since at that point the
+# dense representation is more memory efficient.
+#
+# The suggested value is ~ 3000 in order to have the benefits of
+# the space efficient encoding without slowing down too much PFADD,
+# which is O(N) with the sparse encoding. Thev value can be raised to
+# ~ 10000 when CPU is not a concern, but space is, and the data set is
+# composed of many HyperLogLogs with cardinality in the 0 - 15000 range.
+hll-sparse-max-bytes 3000
+
# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in
# order to help rehashing the main Redis hash table (the one mapping top-level
# keys to values). The hash table implementation Redis uses (see dict.c)
diff --git a/src/config.c b/src/config.c
index 964772dd8..ece4a95f4 100644
--- a/src/config.c
+++ b/src/config.c
@@ -388,6 +388,8 @@ void loadServerConfigFromString(char *config) {
server.zset_max_ziplist_entries = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"zset-max-ziplist-value") && argc == 2) {
server.zset_max_ziplist_value = memtoll(argv[1], NULL);
+ } else if (!strcasecmp(argv[0],"hll-sparse-max-bytes") && argc == 2) {
+ server.hll_sparse_max_bytes = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"rename-command") && argc == 3) {
struct redisCommand *cmd = lookupCommand(argv[1]);
int retval;
@@ -733,6 +735,9 @@ void configSetCommand(redisClient *c) {
} else if (!strcasecmp(c->argv[2]->ptr,"zset-max-ziplist-value")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
server.zset_max_ziplist_value = ll;
+ } else if (!strcasecmp(c->argv[2]->ptr,"hll-sparse-max-bytes")) {
+ if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
+ server.hll_sparse_max_bytes = ll;
} else if (!strcasecmp(c->argv[2]->ptr,"lua-time-limit")) {
if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
server.lua_time_limit = ll;
@@ -934,6 +939,8 @@ void configGetCommand(redisClient *c) {
server.zset_max_ziplist_entries);
config_get_numerical_field("zset-max-ziplist-value",
server.zset_max_ziplist_value);
+ config_get_numerical_field("hll-sparse-max-bytes",
+ server.hll_sparse_max_bytes);
config_get_numerical_field("lua-time-limit",server.lua_time_limit);
config_get_numerical_field("slowlog-log-slower-than",
server.slowlog_log_slower_than);
@@ -1726,6 +1733,7 @@ int rewriteConfig(char *path) {
rewriteConfigNumericalOption(state,"set-max-intset-entries",server.set_max_intset_entries,REDIS_SET_MAX_INTSET_ENTRIES);
rewriteConfigNumericalOption(state,"zset-max-ziplist-entries",server.zset_max_ziplist_entries,REDIS_ZSET_MAX_ZIPLIST_ENTRIES);
rewriteConfigNumericalOption(state,"zset-max-ziplist-value",server.zset_max_ziplist_value,REDIS_ZSET_MAX_ZIPLIST_VALUE);
+ rewriteConfigNumericalOption(state,"hll-sparse-max-bytes",server.hll_sparse_max_bytes,REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES);
rewriteConfigYesNoOption(state,"activerehashing",server.activerehashing,REDIS_DEFAULT_ACTIVE_REHASHING);
rewriteConfigClientoutputbufferlimitOption(state);
rewriteConfigNumericalOption(state,"hz",server.hz,REDIS_DEFAULT_HZ);
diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index b1b330310..bb07aa380 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -176,7 +176,7 @@
* involved in updating the sparse representation is not justified by the
* memory savings. The exact maximum length of the sparse representation
* when this implementation switches to the dense representation is
- * configured via the define HLL_SPARSE_MAX.
+ * configured via the define server.hll_sparse_max_bytes.
*/
struct hllhdr {
@@ -202,8 +202,6 @@ struct hllhdr {
#define HLL_SPARSE 1 /* Sparse encoding */
#define HLL_MAX_ENCODING 1
-#define HLL_SPARSE_MAX 3000
-
static char *invalid_hll_err = "Corrupted HLL object detected";
/* =========================== Low level bit macros ========================= */
@@ -634,7 +632,7 @@ int hllSparseToDense(robj *o) {
* As a side effect the function may promote the HLL representation from
* sparse to dense: this happens when a register requires to be set to a value
* not representable with the sparse representation, or when the resulting
- * size would be greater than HLL_SPARSE_MAX. */
+ * size would be greater than server.hll_sparse_max_bytes. */
int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) {
struct hllhdr *hdr;
uint8_t oldcount, count, *sparse, *end, *p, *prev, *next;
@@ -815,7 +813,8 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) {
int oldlen = is_xzero ? 2 : 1;
int deltalen = seqlen-oldlen;
- if (deltalen > 0 && sdslen(o->ptr)+deltalen > HLL_SPARSE_MAX) goto promote;
+ if (deltalen > 0 &&
+ sdslen(o->ptr)+deltalen > server.hll_sparse_max_bytes) goto promote;
if (deltalen && next) memmove(next+deltalen,next,end-next);
sdsIncrLen(o->ptr,deltalen);
memcpy(p,seq,seqlen);
@@ -1312,7 +1311,7 @@ void pfselftestCommand(redisClient *c) {
/* Make sure that for small cardinalities we use sparse
* encoding. */
- if (j == checkpoint && j < HLL_SPARSE_MAX/2) {
+ if (j == checkpoint && j < server.hll_sparse_max_bytes/2) {
hdr2 = o->ptr;
if (hdr2->encoding != HLL_SPARSE) {
addReplyError(c, "TESTFAILED sparse encoding not used");
diff --git a/src/redis.c b/src/redis.c
index 8a74350b4..cd0410f16 100644
--- a/src/redis.c
+++ b/src/redis.c
@@ -1353,6 +1353,7 @@ void initServerConfig() {
server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES;
server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES;
server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE;
+ server.hll_sparse_max_bytes = REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES;
server.shutdown_asap = 0;
server.repl_ping_slave_period = REDIS_REPL_PING_SLAVE_PERIOD;
server.repl_timeout = REDIS_REPL_TIMEOUT;
diff --git a/src/redis.h b/src/redis.h
index 0c40a7f4f..bb1ad2ab6 100644
--- a/src/redis.h
+++ b/src/redis.h
@@ -303,6 +303,9 @@
#define REDIS_ZSET_MAX_ZIPLIST_ENTRIES 128
#define REDIS_ZSET_MAX_ZIPLIST_VALUE 64
+/* HyperLogLog defines */
+#define REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES 3000
+
/* Sets operations codes */
#define REDIS_OP_UNION 0
#define REDIS_OP_DIFF 1
@@ -755,6 +758,7 @@ struct redisServer {
size_t set_max_intset_entries;
size_t zset_max_ziplist_entries;
size_t zset_max_ziplist_value;
+ size_t hll_sparse_max_bytes;
time_t unixtime; /* Unix time sampled every cron cycle. */
long long mstime; /* Like 'unixtime' but with milliseconds resolution. */
/* Pubsub */