summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2014-04-09 18:56:00 +0200
committerantirez <antirez@gmail.com>2014-04-16 15:09:46 +0200
commit2b531f729dc971c725bda1cd6122ba374bd35d03 (patch)
treec059525be5f82c43989ae01f9269366e2029228b
parent437cddee0d2ad6594a31c56e67bb9a06202cb2aa (diff)
downloadredis-2b531f729dc971c725bda1cd6122ba374bd35d03.tar.gz
HyperLogLog sparse representation description and macros.
-rw-r--r--src/hyperloglog.c107
1 files changed, 104 insertions, 3 deletions
diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index aba74803a..07846ad33 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -53,7 +53,18 @@
* [2] P. Flajolet, Éric Fusy, O. Gandouet, and F. Meunier. Hyperloglog: The
* analysis of a near-optimal cardinality estimation algorithm.
*
- * The representation used by Redis is the following:
+ * Redis uses two representations:
+ *
+ * 1) A "dense" representation where every entry is represented by
+ * a 6-bit integer.
+ * 2) A "sparse" representation using run length compression suitable
+ * for representing HyperLogLogs with many registers set to 0 in
+ * a memory efficient way.
+ *
+ * Dense representation
+ * ===
+ *
+ * The dense representation used by Redis is the following:
*
* +--------+--------+--------+------// //--+----------+------+-----+
* |11000000|22221111|33333322|55444444 .... | uint64_t | HYLL | Ver |
@@ -75,7 +86,85 @@
*
* When the most significant bit in the most significant byte of the cached
* cardinality is set, it means that the data structure was modified and
- * we can't reuse the cached value that must be recomputed. */
+ * we can't reuse the cached value that must be recomputed.
+ *
+ * Sparse representation
+ * ===
+ *
+ * The sparse representation encodes registers using three possible
+ * kind of "opcodes", two composed of just one byte, and one composed
+ * of two bytes. The opcodes are called ZERO, XZERO and VAL.
+ *
+ * ZERO opcode is represented as 00xxxxxx. The 6-bit integer represented
+ * by the six bits 'xxxxxx', plus 1, means that there are N registers set
+ * to 0. This opcode can represent from 1 to 64 contiguous registers set
+ * to the value of 0.
+ *
+ * XZERO opcode is represented by two bytes 01xxxxxx yyyyyyyy. The 14-bit
+ * integer represented by the bits 'xxxxxx' as most significant bits and
+ * 'yyyyyyyy' as least significant bits, plus 1, means that there are N
+ * registers set to 0. This opcode can represent from 65 to 16384 contiguous
+ * registers set to the value of 0.
+ *
+ * VAL opcode is represented as 1vvvvxxx. It contains a 4-bit integer
+ * representing the value of a register, and a 3-bit integer representing
+ * the number of contiguous registers set to that value 'vvvv'.
+ * As with the other opcodes, to obtain the value and run length, the
+ * integers vvvv and xxx must be additioned to 1.
+ * This opcode can represent values from 1 to 16, repeated from 1 to 8 times.
+ *
+ * The sparse representation can't represent registers with a value greater
+ * than 16, however it is very unlikely that we find such a register in an
+ * HLL with a cardinality where the sparse representation is still more
+ * memory efficient than the dense representation. When this happens the
+ * HLL is converted to the dense representation.
+ *
+ * The sparse representation is purely positional. For example a sparse
+ * representation of an empty HLL is just: XZERO:16384.
+ *
+ * An HLL having only 3 non-zero registers at position 1000, 1020, 1021
+ * respectively set to 2, 3, 3, is represented by the following three
+ * opcodes:
+ *
+ * XZERO:1000 (Registers 0-999 are set to 0)
+ * VAL:2,1 (1 register set to value 2, that is register 1000)
+ * ZERO:19 (Registers 1001-1019 set to 0)
+ * VAL:3,2 (2 registers set to value 3, that is registers 1020,1021)
+ * XZERO:15362 (Registers 1022-16383 set to 0)
+ *
+ * In the example the sparse representation used just 7 bytes instead
+ * of 12k in order to represent the HLL registers. In general for low
+ * cardinality there is a big win in terms of space efficiency, traded
+ * with CPU time since the sparse representation is slower to access:
+ *
+ * The following table shows real-world space savings obtained:
+ *
+ * cardinality 1: 5 bytes (0.00244140625 bits/reg, 1 registers)
+ * cardinality 10: 31 bytes (0.01513671875 bits/reg, 10 registers)
+ * cardinality 100: 271 bytes (0.13232421875 bits/reg, 100 registers)
+ * cardinality 1000: 1906 bytes (0.9306640625 bits/reg, 971 registers)
+ * cardinality 2000: 3517 bytes (1.71728515625 bits/reg, 1888 registers)
+ * cardinality 3000: 4918 bytes (2.4013671875 bits/reg, 2745 registers)
+ * cardinality 4000: 6129 bytes (2.99267578125 bits/reg, 3552 registers)
+ * cardinality 5000: 7206 bytes (3.5185546875 bits/reg, 4297 registers)
+ * cardinality 6000: 8099 bytes (3.95458984375 bits/reg, 5013 registers)
+ * cardinality 7000: 8868 bytes (4.330078125 bits/reg, 5673 registers)
+ * cardinality 8000: 9571 bytes (4.67333984375 bits/reg, 6312 registers)
+ * cardinality 9000: 10138 bytes (4.9501953125 bits/reg, 6901 registers)
+ * cardinality 10000: 10717 bytes (5.23291015625 bits/reg, 7473 registers})
+ * cardinality 11000: 11137 bytes (5.43798828125 bits/reg, 8005 registers})
+ * cardinality 12000: 11514 bytes (5.6220703125 bits/reg, 8517 registers})
+ * cardinality 13000: 11809 bytes (5.76611328125 bits/reg, 8962 registers})
+ * cardinality 14000: 12055 bytes (5.88623046875 bits/reg, 9384 registers})
+ * cardinality 15000: 12285 bytes (5.99853515625 bits/reg, 9790 registers})
+ * cardinality 16000: 12459 bytes (6.08349609375 bits/reg, 10180 registers})
+ *
+ * At cardinality around ~16000 is when it is no longer more space efficient
+ * to use the sparse representation. However the exact maximum length of the
+ * sparse representation when this implementation switches to the dense
+ * representation is configured via the define REDIS_HLL_SPARSE_MAX and
+ * can be smaller than 12k in order to save CPU time.
+ */
#define REDIS_HLL_P 14 /* The greater is P, the smaller the error. */
#define REDIS_HLL_REGISTERS (1<<REDIS_HLL_P) /* With P=14, 16384 registers. */
@@ -88,7 +177,9 @@
/* =========================== Low level bit macros ========================= */
-/* We need to get and set 6 bit counters in an array of 8 bit bytes.
+/* Macros to access the dense representation.
+ *
+ * We need to get and set 6 bit counters in an array of 8 bit bytes.
* We use macros to make sure the code is inlined since speed is critical
* especially in order to compute the approximated cardinality in
* HLLCOUNT where we need to access all the registers at once.
@@ -237,6 +328,16 @@
_p[_byte+1] |= _v >> _fb8; \
} while(0)
+/* Macros to access the sparse representation.
+ * The macros parameter is expected to be an uint8_t pointer. */
+#define HLL_SPARSE_IS_ZERO(p) (((*p) & 0xc0) == 0) /* 00xxxxxx */
+#define HLL_SPARSE_IS_XZERO(p) (((*p) & 0xc0) == 0x40) /* 01xxxxxx */
+#define HLL_SPARSE_IS_VAL(p) ((*p) & 0x80) /* 1vvvvxxx */
+#define HLL_SPARSE_ZERO_LEN(p) ((*p) & 0x3f)
+#define HLL_SPARSE_XZERO_LEN(p) ((((*p) & 0x3f) << 6) | (*p))
+#define HLL_SPARSE_VAL_VALUE(p) (((*p) >> 3) & 0xf)
+#define HLL_SPARSE_VAL_LEN(p) ((*p) & 0x7)
+
/* ========================= HyperLogLog algorithm ========================= */
/* Our hash function is MurmurHash2, 64 bit version.