diff options
author | antirez <antirez@gmail.com> | 2019-02-18 18:27:18 +0100 |
---|---|---|
committer | antirez <antirez@gmail.com> | 2019-02-18 18:27:18 +0100 |
commit | 61a01793ed23a2b1cb3c468bd4057ddc7ba0e739 (patch) | |
tree | fb8452d44c29442a0c832d1e75f3bd4ec6da6b5e /src/dict.c | |
parent | e6948b8f2847896d3c6443744a53f7c3f79a1984 (diff) | |
download | redis-61a01793ed23a2b1cb3c468bd4057ddc7ba0e739.tar.gz |
Better distribution for set get-random-element operations.
Diffstat (limited to 'src/dict.c')
-rw-r--r-- | src/dict.c | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/src/dict.c b/src/dict.c index 2cf9d4839..6844e6c8e 100644 --- a/src/dict.c +++ b/src/dict.c @@ -739,6 +739,30 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) { return stored; } +/* This is like dictGetRandomKey() from the POV of the API, but will do more + * work to ensure a better distribution of the returned element. + * + * This function improves the distribution because the dictGetRandomKey() + * problem is that it selects a random bucket, then it selects a random + * element from the chain in the bucket. However elements being in different + * chain lengths will have different probabilities of being reported. With + * this function instead what we do is to consider a "linear" range of the table + * that may be constituted of N buckets with chains of different lengths + * appearing one after the other. Then we report a random element in the range. + * In this way we smooth away the problem of different chain lenghts. */ +#define GETFAIR_NUM_ENTRIES 20 +dictEntry *dictGetFairRandomKey(dict *d) { + dictEntry *entries[GETFAIR_NUM_ENTRIES]; + unsigned int count = dictGetSomeKeys(d,entries,GETFAIR_NUM_ENTRIES); + /* Note that dictGetSomeKeys() may return zero elements in an unlucky + * run() even if there are actually elements inside the hash table. So + * when we get zero, we call the true dictGetRandomKey() that will always + * yeld the element if the hash table has at least one. */ + if (count == 0) return dictGetRandomKey(d); + unsigned int idx = rand() % count; + return entries[idx]; +} + /* Function to reverse bits. Algorithm from: * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ static unsigned long rev(unsigned long v) { |