summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2014-04-11 09:47:52 +0200
committerantirez <antirez@gmail.com>2014-04-16 15:09:46 +0200
commit05b5032302de10f3b1164004030698127e7abb60 (patch)
treefe8b07d250d3f006459eb9be74b7e0bed5f57fd3
parent4407ca6fc8e26d2167482b6dac116761d4de0ec8 (diff)
downloadredis-05b5032302de10f3b1164004030698127e7abb60.tar.gz
hllAdd() refactored into two functions.
Also dense representation access macro renamed accordingly.
-rw-r--r--src/hyperloglog.c59
1 files changed, 35 insertions, 24 deletions
diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index 98ea4f006..32d2bbd91 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -331,7 +331,7 @@ struct hllhdr {
/* Store the value of the register at position 'regnum' into variable 'target'.
* 'p' is an array of unsigned bytes. */
-#define HLL_GET_REGISTER(target,p,regnum) do { \
+#define HLL_DENSE_GET_REGISTER(target,p,regnum) do { \
uint8_t *_p = (uint8_t*) p; \
unsigned long _byte = regnum*HLL_BITS/8; \
unsigned long _fb = regnum*HLL_BITS&7; \
@@ -343,7 +343,7 @@ struct hllhdr {
/* Set the value of the register at position 'regnum' to 'val'.
* 'p' is an array of unsigned bytes. */
-#define HLL_SET_REGISTER(p,regnum,val) do { \
+#define HLL_DENSE_SET_REGISTER(p,regnum,val) do { \
uint8_t *_p = (uint8_t*) p; \
unsigned long _byte = regnum*HLL_BITS/8; \
unsigned long _fb = regnum*HLL_BITS&7; \
@@ -418,20 +418,12 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
return h;
}
-/* "Add" the element in the hyperloglog data structure.
- * Actually nothing is added, but the max 0 pattern counter of the subset
- * the element belongs to is incremented if needed.
- *
- * 'registers' is expected to have room for HLL_REGISTERS plus an
- * additional byte on the right. This requirement is met by sds strings
- * automatically since they are implicitly null terminated.
- *
- * The function always succeed, however if as a result of the operation
- * the approximated cardinality changed, 1 is returned. Otherwise 0
- * is returned. */
-int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
+/* Given a string element to add to the HyperLogLog, returns the length
+ * of the pattern 000..1 of the element hash. As a side effect 'regp' is
+ * set to the register index this element hashes to. */
+int hllPatLen(unsigned char *ele, size_t elesize, int *regp) {
uint64_t hash, bit, index;
- uint8_t oldcount, count;
+ int count;
/* Count the number of zeroes starting from bit HLL_REGISTERS
* (that is a power of two corresponding to the first bit we don't use
@@ -445,6 +437,7 @@ int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
* This may sound like inefficient, but actually in the average case
* there are high probabilities to find a 1 after a few iterations. */
hash = MurmurHash64A(ele,elesize,0xadc83b19ULL);
+ index = hash & HLL_P_MASK; /* Register index. */
hash |= ((uint64_t)1<<63); /* Make sure the loop terminates. */
bit = HLL_REGISTERS; /* First bit not used to address the register. */
count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */
@@ -452,12 +445,30 @@ int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
count++;
bit <<= 1;
}
+ if (regp) *regp = (int) index;
+ return count;
+}
+
+/* "Add" the element in the hyperloglog data structure.
+ * Actually nothing is added, but the max 0 pattern counter of the subset
+ * the element belongs to is incremented if needed.
+ *
+ * 'registers' is expected to have room for HLL_REGISTERS plus an
+ * additional byte on the right. This requirement is met by sds strings
+ * automatically since they are implicitly null terminated.
+ *
+ * The function always succeed, however if as a result of the operation
+ * the approximated cardinality changed, 1 is returned. Otherwise 0
+ * is returned. */
+int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
+ uint8_t oldcount, count;
+ int index;
/* Update the register if this element produced a longer run of zeroes. */
- index = hash & HLL_P_MASK; /* Index a register inside registers. */
- HLL_GET_REGISTER(oldcount,registers,index);
+ count = hllPatLen(ele,elesize,&index);
+ HLL_DENSE_GET_REGISTER(oldcount,registers,index);
if (count > oldcount) {
- HLL_SET_REGISTER(registers,index,count);
+ HLL_DENSE_SET_REGISTER(registers,index,count);
return 1;
} else {
return 0;
@@ -525,7 +536,7 @@ uint64_t hllCount(uint8_t *registers) {
for (j = 0; j < HLL_REGISTERS; j++) {
unsigned long reg;
- HLL_GET_REGISTER(reg,registers,j);
+ HLL_DENSE_GET_REGISTER(reg,registers,j);
if (reg == 0) {
ez++;
E += 1; /* 2^(-reg[j]) is 1 when m is 0. */
@@ -719,7 +730,7 @@ void pfmergeCommand(redisClient *c) {
* to MAX(max[i],hll[i]). */
hdr = o->ptr;
for (i = 0; i < HLL_REGISTERS; i++) {
- HLL_GET_REGISTER(val,hdr->registers,i);
+ HLL_DENSE_GET_REGISTER(val,hdr->registers,i);
if (val > max[i]) max[i] = val;
}
}
@@ -743,7 +754,7 @@ void pfmergeCommand(redisClient *c) {
* invalidate the cached value. */
hdr = o->ptr;
for (j = 0; j < HLL_REGISTERS; j++) {
- HLL_SET_REGISTER(hdr->registers,j,max[j]);
+ HLL_DENSE_SET_REGISTER(hdr->registers,j,max[j]);
}
HLL_INVALIDATE_CACHE(hdr);
@@ -777,13 +788,13 @@ void pfselftestCommand(redisClient *c) {
unsigned int r = rand() & HLL_REGISTER_MAX;
bytecounters[i] = r;
- HLL_SET_REGISTER(bitcounters,i,r);
+ HLL_DENSE_SET_REGISTER(bitcounters,i,r);
}
/* Check that we are able to retrieve the same values. */
for (i = 0; i < HLL_REGISTERS; i++) {
unsigned int val;
- HLL_GET_REGISTER(val,bitcounters,i);
+ HLL_DENSE_GET_REGISTER(val,bitcounters,i);
if (val != bytecounters[i]) {
addReplyErrorFormat(c,
"TESTFAILED Register %d should be %d but is %d",
@@ -848,7 +859,7 @@ void pfgetregCommand(redisClient *c) {
for (j = 0; j < HLL_REGISTERS; j++) {
uint8_t val;
- HLL_GET_REGISTER(val,hdr->registers,j);
+ HLL_DENSE_GET_REGISTER(val,hdr->registers,j);
addReplyLongLong(c,val);
}
}