summaryrefslogtreecommitdiff
path: root/src/dict.c
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2019-02-18 18:27:18 +0100
committerantirez <antirez@gmail.com>2019-02-18 18:27:18 +0100
commit61a01793ed23a2b1cb3c468bd4057ddc7ba0e739 (patch)
treefb8452d44c29442a0c832d1e75f3bd4ec6da6b5e /src/dict.c
parente6948b8f2847896d3c6443744a53f7c3f79a1984 (diff)
downloadredis-61a01793ed23a2b1cb3c468bd4057ddc7ba0e739.tar.gz
Better distribution for set get-random-element operations.
Diffstat (limited to 'src/dict.c')
-rw-r--r--src/dict.c24
1 files changed, 24 insertions, 0 deletions
diff --git a/src/dict.c b/src/dict.c
index 2cf9d4839..6844e6c8e 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -739,6 +739,30 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
return stored;
}
+/* This is like dictGetRandomKey() from the POV of the API, but will do more
+ * work to ensure a better distribution of the returned element.
+ *
+ * This function improves the distribution because the dictGetRandomKey()
+ * problem is that it selects a random bucket, then it selects a random
+ * element from the chain in the bucket. However elements being in different
+ * chain lengths will have different probabilities of being reported. With
+ * this function instead what we do is to consider a "linear" range of the table
+ * that may be constituted of N buckets with chains of different lengths
+ * appearing one after the other. Then we report a random element in the range.
+ * In this way we smooth away the problem of different chain lenghts. */
+#define GETFAIR_NUM_ENTRIES 20
+dictEntry *dictGetFairRandomKey(dict *d) {
+ dictEntry *entries[GETFAIR_NUM_ENTRIES];
+ unsigned int count = dictGetSomeKeys(d,entries,GETFAIR_NUM_ENTRIES);
+ /* Note that dictGetSomeKeys() may return zero elements in an unlucky
+ * run() even if there are actually elements inside the hash table. So
+ * when we get zero, we call the true dictGetRandomKey() that will always
+ * yeld the element if the hash table has at least one. */
+ if (count == 0) return dictGetRandomKey(d);
+ unsigned int idx = rand() % count;
+ return entries[idx];
+}
+
/* Function to reverse bits. Algorithm from:
* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
static unsigned long rev(unsigned long v) {