From 614fcd491e50a0f95c08b72dc292c9cc53ba8a5a Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 15 Apr 2014 17:46:51 +0200 Subject: User-defined switch point between sparse-dense HLL encodings. --- redis.conf | 14 ++++++++++++++ src/config.c | 8 ++++++++ src/hyperloglog.c | 11 +++++------ src/redis.c | 1 + src/redis.h | 4 ++++ 5 files changed, 32 insertions(+), 6 deletions(-) diff --git a/redis.conf b/redis.conf index d83acbba0..fa87fc9f7 100644 --- a/redis.conf +++ b/redis.conf @@ -635,6 +635,20 @@ set-max-intset-entries 512 zset-max-ziplist-entries 128 zset-max-ziplist-value 64 +# HyperLogLog sparse representation bytes limit. The limit includes the +# 16 bytes header. When an HyperLogLog using the sparse representation crosses +# this limit, it is convereted into the dense representation. +# +# A value greater than 16000 is totally useless, since at that point the +# dense representation is more memory efficient. +# +# The suggested value is ~ 3000 in order to have the benefits of +# the space efficient encoding without slowing down too much PFADD, +# which is O(N) with the sparse encoding. Thev value can be raised to +# ~ 10000 when CPU is not a concern, but space is, and the data set is +# composed of many HyperLogLogs with cardinality in the 0 - 15000 range. +hll-sparse-max-bytes 3000 + # Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in # order to help rehashing the main Redis hash table (the one mapping top-level # keys to values). The hash table implementation Redis uses (see dict.c) diff --git a/src/config.c b/src/config.c index 964772dd8..ece4a95f4 100644 --- a/src/config.c +++ b/src/config.c @@ -388,6 +388,8 @@ void loadServerConfigFromString(char *config) { server.zset_max_ziplist_entries = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"zset-max-ziplist-value") && argc == 2) { server.zset_max_ziplist_value = memtoll(argv[1], NULL); + } else if (!strcasecmp(argv[0],"hll-sparse-max-bytes") && argc == 2) { + server.hll_sparse_max_bytes = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"rename-command") && argc == 3) { struct redisCommand *cmd = lookupCommand(argv[1]); int retval; @@ -733,6 +735,9 @@ void configSetCommand(redisClient *c) { } else if (!strcasecmp(c->argv[2]->ptr,"zset-max-ziplist-value")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; server.zset_max_ziplist_value = ll; + } else if (!strcasecmp(c->argv[2]->ptr,"hll-sparse-max-bytes")) { + if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; + server.hll_sparse_max_bytes = ll; } else if (!strcasecmp(c->argv[2]->ptr,"lua-time-limit")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; server.lua_time_limit = ll; @@ -934,6 +939,8 @@ void configGetCommand(redisClient *c) { server.zset_max_ziplist_entries); config_get_numerical_field("zset-max-ziplist-value", server.zset_max_ziplist_value); + config_get_numerical_field("hll-sparse-max-bytes", + server.hll_sparse_max_bytes); config_get_numerical_field("lua-time-limit",server.lua_time_limit); config_get_numerical_field("slowlog-log-slower-than", server.slowlog_log_slower_than); @@ -1726,6 +1733,7 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"set-max-intset-entries",server.set_max_intset_entries,REDIS_SET_MAX_INTSET_ENTRIES); rewriteConfigNumericalOption(state,"zset-max-ziplist-entries",server.zset_max_ziplist_entries,REDIS_ZSET_MAX_ZIPLIST_ENTRIES); rewriteConfigNumericalOption(state,"zset-max-ziplist-value",server.zset_max_ziplist_value,REDIS_ZSET_MAX_ZIPLIST_VALUE); + rewriteConfigNumericalOption(state,"hll-sparse-max-bytes",server.hll_sparse_max_bytes,REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES); rewriteConfigYesNoOption(state,"activerehashing",server.activerehashing,REDIS_DEFAULT_ACTIVE_REHASHING); rewriteConfigClientoutputbufferlimitOption(state); rewriteConfigNumericalOption(state,"hz",server.hz,REDIS_DEFAULT_HZ); diff --git a/src/hyperloglog.c b/src/hyperloglog.c index b1b330310..bb07aa380 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -176,7 +176,7 @@ * involved in updating the sparse representation is not justified by the * memory savings. The exact maximum length of the sparse representation * when this implementation switches to the dense representation is - * configured via the define HLL_SPARSE_MAX. + * configured via the define server.hll_sparse_max_bytes. */ struct hllhdr { @@ -202,8 +202,6 @@ struct hllhdr { #define HLL_SPARSE 1 /* Sparse encoding */ #define HLL_MAX_ENCODING 1 -#define HLL_SPARSE_MAX 3000 - static char *invalid_hll_err = "Corrupted HLL object detected"; /* =========================== Low level bit macros ========================= */ @@ -634,7 +632,7 @@ int hllSparseToDense(robj *o) { * As a side effect the function may promote the HLL representation from * sparse to dense: this happens when a register requires to be set to a value * not representable with the sparse representation, or when the resulting - * size would be greater than HLL_SPARSE_MAX. */ + * size would be greater than server.hll_sparse_max_bytes. */ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { struct hllhdr *hdr; uint8_t oldcount, count, *sparse, *end, *p, *prev, *next; @@ -815,7 +813,8 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { int oldlen = is_xzero ? 2 : 1; int deltalen = seqlen-oldlen; - if (deltalen > 0 && sdslen(o->ptr)+deltalen > HLL_SPARSE_MAX) goto promote; + if (deltalen > 0 && + sdslen(o->ptr)+deltalen > server.hll_sparse_max_bytes) goto promote; if (deltalen && next) memmove(next+deltalen,next,end-next); sdsIncrLen(o->ptr,deltalen); memcpy(p,seq,seqlen); @@ -1312,7 +1311,7 @@ void pfselftestCommand(redisClient *c) { /* Make sure that for small cardinalities we use sparse * encoding. */ - if (j == checkpoint && j < HLL_SPARSE_MAX/2) { + if (j == checkpoint && j < server.hll_sparse_max_bytes/2) { hdr2 = o->ptr; if (hdr2->encoding != HLL_SPARSE) { addReplyError(c, "TESTFAILED sparse encoding not used"); diff --git a/src/redis.c b/src/redis.c index 8a74350b4..cd0410f16 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1353,6 +1353,7 @@ void initServerConfig() { server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES; server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES; server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE; + server.hll_sparse_max_bytes = REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES; server.shutdown_asap = 0; server.repl_ping_slave_period = REDIS_REPL_PING_SLAVE_PERIOD; server.repl_timeout = REDIS_REPL_TIMEOUT; diff --git a/src/redis.h b/src/redis.h index 0c40a7f4f..bb1ad2ab6 100644 --- a/src/redis.h +++ b/src/redis.h @@ -303,6 +303,9 @@ #define REDIS_ZSET_MAX_ZIPLIST_ENTRIES 128 #define REDIS_ZSET_MAX_ZIPLIST_VALUE 64 +/* HyperLogLog defines */ +#define REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES 3000 + /* Sets operations codes */ #define REDIS_OP_UNION 0 #define REDIS_OP_DIFF 1 @@ -755,6 +758,7 @@ struct redisServer { size_t set_max_intset_entries; size_t zset_max_ziplist_entries; size_t zset_max_ziplist_value; + size_t hll_sparse_max_bytes; time_t unixtime; /* Unix time sampled every cron cycle. */ long long mstime; /* Like 'unixtime' but with milliseconds resolution. */ /* Pubsub */ -- cgit v1.2.1