summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2015-02-06 15:48:42 +0100
committerantirez <antirez@gmail.com>2015-02-11 10:52:27 +0100
commit5792a217f85633225209557d952ad2fa6a3fa0c0 (patch)
tree79c7a4af349f314521bf5c5cf2e739108252659f
parentf25fdd6246f01b6ee3c0ce557e2911bc8c068518 (diff)
downloadredis-5792a217f85633225209557d952ad2fa6a3fa0c0.tar.gz
dict.c: add dictGetSomeKeys(), specialized for eviction.
-rw-r--r--src/dict.c94
-rw-r--r--src/dict.h1
-rw-r--r--src/redis.c2
3 files changed, 95 insertions, 2 deletions
diff --git a/src/dict.c b/src/dict.c
index c8aaf1529..8f25c14bc 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -664,7 +664,10 @@ dictEntry *dictGetRandomKey(dict *d)
return he;
}
-/* This is a version of dictGetRandomKey() that is modified in order to
+/* XXX: This is going to be removed soon and SPOP internals
+ * reimplemented.
+ *
+ * This is a version of dictGetRandomKey() that is modified in order to
* return multiple entries by jumping at a random place of the hash table
* and scanning linearly for entries.
*
@@ -735,6 +738,95 @@ unsigned int dictGetRandomKeys(dict *d, dictEntry **des, unsigned int count) {
return stored; /* Never reached. */
}
+
+/* This function samples the dictionary to return a few keys from random
+ * locations.
+ *
+ * It does not guarantee to return all the keys specified in 'count', nor
+ * it does guarantee to return non-duplicated elements, however it will make
+ * some effort to do both things.
+ *
+ * Returned pointers to hash table entries are stored into 'des' that
+ * points to an array of dictEntry pointers. The array must have room for
+ * at least 'count' elements, that is the argument we pass to the function
+ * to tell how many random elements we need.
+ *
+ * The function returns the number of items stored into 'des', that may
+ * be less than 'count' if the hash table has less than 'count' elements
+ * inside, or if not enough elements were found in a reasonable amount of
+ * steps.
+ *
+ * Note that this function is not suitable when you need a good distribution
+ * of the returned items, but only when you need to "sample" a given number
+ * of continuous elements to run some kind of algorithm or to produce
+ * statistics. However the function is much faster than dictGetRandomKey()
+ * at producing N elements. */
+unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
+ unsigned int j; /* internal hash table id, 0 or 1. */
+ unsigned int tables; /* 1 or 2 tables? */
+ unsigned int stored = 0, maxsizemask;
+ unsigned int maxsteps;
+
+ if (dictSize(d) < count) count = dictSize(d);
+ maxsteps = count*10;
+
+ /* Try to do a rehashing work proportional to 'count'. */
+ for (j = 0; j < count; j++) {
+ if (dictIsRehashing(d))
+ _dictRehashStep(d);
+ else
+ break;
+ }
+
+ tables = dictIsRehashing(d) ? 2 : 1;
+ maxsizemask = d->ht[0].sizemask;
+ if (tables > 1 && maxsizemask < d->ht[1].sizemask)
+ maxsizemask = d->ht[1].sizemask;
+
+ /* Pick a random point inside the larger table. */
+ unsigned int i = random() & maxsizemask;
+ unsigned int emptylen = 0; /* Continuous empty entries so far. */
+ while(stored < count && maxsteps--) {
+ for (j = 0; j < tables; j++) {
+ /* Invariant of the dict.c rehashing: up to the indexes already
+ * visited in ht[0] during the rehashing, there are no populated
+ * buckets, so we can skip ht[0] for indexes between 0 and idx-1. */
+ if (tables == 2 && j == 0 && i < d->rehashidx) {
+ /* Moreover, if we are currently out of range in the second
+ * table, there will be no elements in both tables up to
+ * the current rehashing index, so we jump if possible.
+ * (this happens when going from big to small table). */
+ if (i >= d->ht[1].size) i = d->rehashidx;
+ continue;
+ }
+ if (i >= d->ht[j].size) continue; /* Out of range for this table. */
+ dictEntry *he = d->ht[j].table[i];
+
+ /* Count contiguous empty buckets, and jump to other
+ * locations if they reach 'count' (with a minimum of 5). */
+ if (he == NULL) {
+ emptylen++;
+ if (emptylen >= 5 && emptylen > count) {
+ i = random() & maxsizemask;
+ emptylen = 0;
+ }
+ } else {
+ while (he) {
+ /* Collect all the elements of the buckets found non
+ * empty while iterating. */
+ *des = he;
+ des++;
+ he = he->next;
+ stored++;
+ if (stored == count) return stored;
+ }
+ }
+ }
+ i = (i+1) & maxsizemask;
+ }
+ return stored;
+}
+
/* Function to reverse bits. Algorithm from:
* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
static unsigned long rev(unsigned long v) {
diff --git a/src/dict.h b/src/dict.h
index 7421078f8..5959a57b9 100644
--- a/src/dict.h
+++ b/src/dict.h
@@ -164,6 +164,7 @@ dictIterator *dictGetSafeIterator(dict *d);
dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
+unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
unsigned int dictGetRandomKeys(dict *d, dictEntry **des, unsigned int count);
void dictPrintStats(dict *d);
unsigned int dictGenHashFunction(const void *key, int len);
diff --git a/src/redis.c b/src/redis.c
index b2f9ffc68..a1d8375e6 100644
--- a/src/redis.c
+++ b/src/redis.c
@@ -3148,7 +3148,7 @@ void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEn
}
#if 1 /* Use bulk get by default. */
- count = dictGetRandomKeys(sampledict,samples,server.maxmemory_samples);
+ count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
#else
count = server.maxmemory_samples;
for (j = 0; j < count; j++) samples[j] = dictGetRandomKey(sampledict);