summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2014-04-12 10:55:28 +0200
committerantirez <antirez@gmail.com>2014-04-16 15:09:46 +0200
commitdf5782f4d0da70a9a4a1f21053b3e75fa08fbf95 (patch)
treed99f02cb94a03dccaacdcaa5b14fecdb20e5ea5a
parente8723941760a9e51e97f1a196c413e1b4a68148f (diff)
downloadredis-df5782f4d0da70a9a4a1f21053b3e75fa08fbf95.tar.gz
HyperLogLog sparse to dense conversion function.
-rw-r--r--src/hyperloglog.c47
1 files changed, 44 insertions, 3 deletions
diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index a95a21c91..5e5064b1b 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -202,6 +202,8 @@ struct hllhdr {
#define HLL_SPARSE 1 /* Sparse encoding */
#define HLL_MAX_ENCODING 1
+#define HLL_SPARSE_MAX 3000
+
/* =========================== Low level bit macros ========================= */
/* Macros to access the dense representation.
@@ -555,8 +557,46 @@ double hllDenseSum(uint8_t *registers, double *PE, int *ezp) {
/* ================== Sparse representation implementation ================= */
-sds hllSparseToDense(sds *sparse) {
- return sdsnew("TODO");
+/* Convert the HLL with sparse representation given as input in its dense
+ * representation. Both representations are represented by SDS strings, and
+ * the input representation is freed as a side effect. */
+sds hllSparseToDense(sds sparse) {
+ sds dense;
+ struct hllhdr *hdr, *oldhdr = (struct hllhdr*)sparse;
+ int idx = 0, runlen, regval;
+ uint8_t *p = (uint8_t*)sparse, *end = p+sdslen(sparse);
+
+ /* Create a string of the right size filled with zero bytes.
+ * Note that the cached cardinality is set to 0 as a side effect
+ * that is exactly the cardinality of an empty HLL. */
+ dense = sdsnewlen(NULL,HLL_DENSE_SIZE);
+ hdr = (struct hllhdr*) dense;
+ *hdr = *oldhdr; /* This will copy the magic and cached cardinality. */
+ hdr->encoding = HLL_DENSE;
+
+ /* Now read the sparse representation and set non-zero registers
+ * accordingly. */
+ p += HLL_HDR_SIZE;
+ while(p < end) {
+ if (HLL_SPARSE_IS_ZERO(p)) {
+ runlen = HLL_SPARSE_ZERO_LEN(p);
+ idx += runlen;
+ } else if (HLL_SPARSE_IS_XZERO(p)) {
+ runlen = HLL_SPARSE_XZERO_LEN(p);
+ idx += runlen;
+ } else {
+ runlen = HLL_SPARSE_VAL_LEN(p);
+ regval = HLL_SPARSE_VAL_VALUE(p);
+ while(runlen--) {
+ HLL_DENSE_SET_REGISTER(hdr->registers,idx,regval);
+ idx++;
+ }
+ }
+ }
+
+ /* Free the old representation and return the new one. */
+ sdsfree(sparse);
+ return dense;
}
/* "Add" the element in the sparse hyperloglog data structure.
@@ -739,6 +779,8 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) {
int seqlen = seq-n;
int oldlen = is_xzero ? 2 : 1;
int deltalen = seqlen-oldlen;
+
+ if (deltalen > 0 && sdslen(o->ptr) > HLL_SPARSE_MAX) goto promote;
if (deltalen && next) {
memmove(next+deltalen,next,next-sparse);
sdsIncrLen(o->ptr,deltalen);
@@ -774,7 +816,6 @@ double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp) {
uint8_t *end = sparse+sparselen, *p = sparse;
while(p < end) {
- /* Set span to the number of registers covered by this opcode. */
if (HLL_SPARSE_IS_ZERO(p)) {
runlen = HLL_SPARSE_ZERO_LEN(p);
idx += runlen;