From 0feb2aabcad2943b9cec850f2495a7d7f91a998b Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 17 Apr 2014 17:29:04 +0200 Subject: PFCOUNT support for multi-key union. --- src/hyperloglog.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++---- src/redis.c | 2 +- 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 1b984c15f..a44df87c9 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -198,8 +198,9 @@ struct hllhdr { #define HLL_REGISTER_MAX ((1<registers will point to an uint8_t array of HLL_REGISTERS element. + * This is useful in order to speedup PFCOUNT when called against multiple + * keys (no need to work with 6-bit integers encoding). */ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { double m = HLL_REGISTERS; double E, alpha = 0.7213/(1+1.079/m); @@ -947,9 +973,13 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { /* Compute SUM(2^-register[0..i]). */ if (hdr->encoding == HLL_DENSE) { E = hllDenseSum(hdr->registers,PE,&ez); - } else { + } else if (hdr->encoding == HLL_SPARSE) { E = hllSparseSum(hdr->registers, sdslen((sds)hdr)-HLL_HDR_SIZE,PE,&ez,invalid); + } else if (hdr->encoding == HLL_RAW) { + E = hllRawSum(hdr->registers,PE,&ez); + } else { + redisPanic("Unknown HyperLogLog encoding in hllCount()"); } /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */ @@ -1151,10 +1181,47 @@ void pfaddCommand(redisClient *c) { /* PFCOUNT var -> approximated cardinality of set. */ void pfcountCommand(redisClient *c) { - robj *o = lookupKeyRead(c->db,c->argv[1]); + robj *o; struct hllhdr *hdr; uint64_t card; + /* Case 1: multi-key keys, cardinality of the union. + * + * When multiple keys are specified, PFCOUNT actually computes + * the cardinality of the merge of the N HLLs specified. */ + if (c->argc > 2) { + uint8_t max[HLL_HDR_SIZE+HLL_REGISTERS], *registers; + int j; + + /* Compute an HLL with M[i] = MAX(M[i]_j). */ + memset(max,0,sizeof(max)); + hdr = (struct hllhdr*) max; + hdr->encoding = HLL_RAW; /* Special internal-only encoding. */ + registers = max + HLL_HDR_SIZE; + for (j = 1; j < c->argc; j++) { + /* Check type and size. */ + robj *o = lookupKeyRead(c->db,c->argv[j]); + if (o == NULL) continue; /* Assume empty HLL for non existing var. */ + if (isHLLObjectOrReply(c,o) != REDIS_OK) return; + + /* Merge with this HLL with our 'max' HHL by setting max[i] + * to MAX(max[i],hll[i]). */ + if (hllMerge(registers,o) == REDIS_ERR) { + addReplySds(c,sdsnew(invalid_hll_err)); + return; + } + } + + /* Compute cardinality of the resulting set. */ + addReplyLongLong(c,hllCount(hdr,NULL)); + return; + } + + /* Case 2: cardinality of the single HLL. + * + * The user specified a single key. Either return the cached value + * or compute one and update the cache. */ + o = lookupKeyRead(c->db,c->argv[1]); if (o == NULL) { /* No key? Cardinality is zero since no element was added, otherwise * we would have a key as HLLADD creates it as a side effect. */ diff --git a/src/redis.c b/src/redis.c index d7b9c72dc..88743550f 100644 --- a/src/redis.c +++ b/src/redis.c @@ -274,7 +274,7 @@ struct redisCommand redisCommandTable[] = { {"wait",waitCommand,3,"rs",0,NULL,0,0,0,0,0}, {"pfselftest",pfselftestCommand,1,"r",0,NULL,0,0,0,0,0}, {"pfadd",pfaddCommand,-2,"wm",0,NULL,1,1,1,0,0}, - {"pfcount",pfcountCommand,2,"w",0,NULL,1,1,1,0,0}, + {"pfcount",pfcountCommand,-2,"w",0,NULL,1,1,1,0,0}, {"pfmerge",pfmergeCommand,-2,"wm",0,NULL,1,-1,1,0,0}, {"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0} }; -- cgit v1.2.1