diff options
author | antirez <antirez@gmail.com> | 2014-04-17 18:02:45 +0200 |
---|---|---|
committer | antirez <antirez@gmail.com> | 2014-04-17 18:05:27 +0200 |
commit | 5eb7ac0c920336b5711f125b6dcc627d89b9f970 (patch) | |
tree | 55648c115c2809946ac2f113484afe3a446eca74 /src/hyperloglog.c | |
parent | 192a21327469cf3b01fd8bb17581dfda05a772f7 (diff) | |
download | redis-5eb7ac0c920336b5711f125b6dcc627d89b9f970.tar.gz |
Speedup hllRawSum() processing 8 bytes per iteration.
The internal HLL raw encoding used by PFCOUNT when merging multiple keys
is aligned to 8 bits (1 byte per register) so we can exploit this to
improve performances by processing multiple bytes per iteration.
In benchmarks the new code was several times faster with HLLs with many
registers set to zero, while no slowdown was observed with populated
HLLs.
Diffstat (limited to 'src/hyperloglog.c')
-rw-r--r-- | src/hyperloglog.c | 22 |
1 files changed, 15 insertions, 7 deletions
diff --git a/src/hyperloglog.c b/src/hyperloglog.c index c951c09f7..1c6ed45f3 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -928,16 +928,24 @@ double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp, int *i double hllRawSum(uint8_t *registers, double *PE, int *ezp) { double E = 0; int j, ez = 0; - unsigned long reg; + uint64_t *word = (uint64_t*) registers; + uint8_t *bytes; - for (j = 0; j < HLL_REGISTERS; j++) { - reg = registers[j]; - if (reg == 0) { - ez++; - /* Increment E at the end of the loop. */ + for (j = 0; j < HLL_REGISTERS/8; j++) { + if (*word == 0) { + ez += 8; } else { - E += PE[reg]; /* Precomputed 2^(-reg[j]). */ + bytes = (uint8_t*) word; + if (bytes[0]) E += PE[bytes[0]]; else ez++; + if (bytes[1]) E += PE[bytes[1]]; else ez++; + if (bytes[2]) E += PE[bytes[2]]; else ez++; + if (bytes[3]) E += PE[bytes[3]]; else ez++; + if (bytes[4]) E += PE[bytes[4]]; else ez++; + if (bytes[5]) E += PE[bytes[5]]; else ez++; + if (bytes[6]) E += PE[bytes[6]]; else ez++; + if (bytes[7]) E += PE[bytes[7]]; else ez++; } + word++; } E += ez; /* 2^(-reg[j]) is 1 when m is 0, add it 'ez' times for every zero register in the HLL. */ |