summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2014-04-17 18:02:45 +0200
committerantirez <antirez@gmail.com>2014-04-17 18:05:27 +0200
commit5eb7ac0c920336b5711f125b6dcc627d89b9f970 (patch)
tree55648c115c2809946ac2f113484afe3a446eca74
parent192a21327469cf3b01fd8bb17581dfda05a772f7 (diff)
downloadredis-5eb7ac0c920336b5711f125b6dcc627d89b9f970.tar.gz
Speedup hllRawSum() processing 8 bytes per iteration.
The internal HLL raw encoding used by PFCOUNT when merging multiple keys is aligned to 8 bits (1 byte per register) so we can exploit this to improve performances by processing multiple bytes per iteration. In benchmarks the new code was several times faster with HLLs with many registers set to zero, while no slowdown was observed with populated HLLs.
-rw-r--r--src/hyperloglog.c22
1 files changed, 15 insertions, 7 deletions
diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index c951c09f7..1c6ed45f3 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -928,16 +928,24 @@ double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp, int *i
double hllRawSum(uint8_t *registers, double *PE, int *ezp) {
double E = 0;
int j, ez = 0;
- unsigned long reg;
+ uint64_t *word = (uint64_t*) registers;
+ uint8_t *bytes;
- for (j = 0; j < HLL_REGISTERS; j++) {
- reg = registers[j];
- if (reg == 0) {
- ez++;
- /* Increment E at the end of the loop. */
+ for (j = 0; j < HLL_REGISTERS/8; j++) {
+ if (*word == 0) {
+ ez += 8;
} else {
- E += PE[reg]; /* Precomputed 2^(-reg[j]). */
+ bytes = (uint8_t*) word;
+ if (bytes[0]) E += PE[bytes[0]]; else ez++;
+ if (bytes[1]) E += PE[bytes[1]]; else ez++;
+ if (bytes[2]) E += PE[bytes[2]]; else ez++;
+ if (bytes[3]) E += PE[bytes[3]]; else ez++;
+ if (bytes[4]) E += PE[bytes[4]]; else ez++;
+ if (bytes[5]) E += PE[bytes[5]]; else ez++;
+ if (bytes[6]) E += PE[bytes[6]]; else ez++;
+ if (bytes[7]) E += PE[bytes[7]]; else ez++;
}
+ word++;
}
E += ez; /* 2^(-reg[j]) is 1 when m is 0, add it 'ez' times for every
zero register in the HLL. */