Added dictGetRandomKeys() to dict.c: mass get random entries.

This new function is useful to get a number of random entries from an hash table when we just need to do some sampling without particularly good distribution. It just jumps at a random place of the hash table and returns the first N items encountered by scanning linearly. The main usefulness of this function is to speedup Redis internal sampling of the key space, for example for key eviction or expiry.
author: antirez <antirez@gmail.com> 2014-03-20 15:50:46 +0100
committer: antirez <antirez@gmail.com> 2014-03-20 15:50:46 +0100
commit: 5317f5e99af6505048793343d94b0631b63ba029 (patch)
tree: 2c4c5ac0b7db808877a2157e6b8401429abd4416 /src/dict.c
parent: 22c9cfaf57d330dcea487aca96526fdd78401fa2 (diff)
download: redis-5317f5e99af6505048793343d94b0631b63ba029.tar.gz
1 files changed, 52 insertions, 0 deletions
diff --git a/src/dict.c b/src/dict.c
index 10e42eabf..b27920a44 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -649,6 +649,58 @@ dictEntry *dictGetRandomKey(dict *d)
     return he;
 }
 
+/* This is a version of dictGetRandomKey() that is modified in order to
+ * return multiple entries by jumping at a random place of the hash table
+ * and scanning linearly for entries.
+ *
+ * Returned pointers to hash table entries are stored into 'des' that
+ * points to an array of dictEntry pointers. The array must have room for
+ * at least 'count' elements, that is the argument we pass to the function
+ * to tell how many random elements we need.
+ *
+ * The function returns the number of items stored into 'des', that may
+ * be less than 'count' if the hash table has less than 'count' elements
+ * inside.
+ *
+ * Note that this function is not suitable when you need a good distribution
+ * of the returned items, but only when you need to "sample" a given number
+ * of continuous elements to run some kind of algorithm or to produce
+ * statistics. However the function is much faster than dictGetRandomKey()
+ * at producing N elements, and the elements are guaranteed to be non
+ * repeating. */
+int dictGetRandomKeys(dict *d, dictEntry **des, int count) {
+    int j; /* internal hash table id, 0 or 1. */
+    int stored = 0;
+
+    if (dictSize(d) < count) count = dictSize(d);
+    while(stored < count) {
+        for (j = 0; j < 2; j++) {
+            /* Pick a random point inside the hash table 0 or 1. */
+            unsigned int i = random() & d->ht[j].sizemask;
+            int size = d->ht[j].size;
+
+            /* Make sure to visit every bucket by iterating 'size' times. */
+            while(size--) {
+                dictEntry *he = d->ht[j].table[i];
+                while (he) {
+                    /* Collect all the elements of the buckets found non
+                     * empty while iterating. */
+                    *des = he;
+                    des++;
+                    he = he->next;
+                    stored++;
+                    if (stored == count) return stored;
+                }
+                i = (i+1) & d->ht[j].sizemask;
+            }
+            /* If there is only one table and we iterated it all, we should
+             * already have 'count' elements. Assert this condition. */
+            assert(dictIsRehashing(d) != 0);
+        }
+    }
+    return stored; /* Never reached. */
+}
+
 /* Function to reverse bits. Algorithm from:
  * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
 static unsigned long rev(unsigned long v) {
author	antirez <antirez@gmail.com>	2014-03-20 15:50:46 +0100
committer	antirez <antirez@gmail.com>	2014-03-20 15:50:46 +0100
commit	5317f5e99af6505048793343d94b0631b63ba029 (patch)
tree	2c4c5ac0b7db808877a2157e6b8401429abd4416 /src/dict.c
parent	22c9cfaf57d330dcea487aca96526fdd78401fa2 (diff)
download	redis-5317f5e99af6505048793343d94b0631b63ba029.tar.gz