summaryrefslogtreecommitdiff
path: root/src/evict.c
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2016-07-06 15:28:18 +0200
committerantirez <antirez@gmail.com>2016-07-06 15:28:18 +0200
commit4a140d320f7a8e1a63f9c3ca588f61f21838f3ac (patch)
tree0257375a8b7f37bc568c8a46c309369a8b485783 /src/evict.c
parentb46239e58b00774d121de89e0e033b2ed3181eb0 (diff)
downloadredis-4a140d320f7a8e1a63f9c3ca588f61f21838f3ac.tar.gz
Add expire.c and evict.c.
Diffstat (limited to 'src/evict.c')
-rw-r--r--src/evict.c364
1 files changed, 364 insertions, 0 deletions
diff --git a/src/evict.c b/src/evict.c
new file mode 100644
index 000000000..c35b10b8f
--- /dev/null
+++ b/src/evict.c
@@ -0,0 +1,364 @@
+/* Maxmemory directive handling (LRU eviction and other policies).
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "bio.h"
+
+/* Return the LRU clock, based on the clock resolution. This is a time
+ * in a reduced-bits format that can be used to set and check the
+ * object->lru field of redisObject structures. */
+unsigned int getLRUClock(void) {
+ return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
+}
+
+/* Given an object returns the min number of milliseconds the object was never
+ * requested, using an approximated LRU algorithm. */
+unsigned long long estimateObjectIdleTime(robj *o) {
+ unsigned long long lruclock = LRU_CLOCK();
+ if (lruclock >= o->lru) {
+ return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
+ } else {
+ return (lruclock + (LRU_CLOCK_MAX - o->lru)) *
+ LRU_CLOCK_RESOLUTION;
+ }
+}
+
+/* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config
+ * file to limit the max memory used by the server, before processing a
+ * command.
+ *
+ * The goal of the function is to free enough memory to keep Redis under the
+ * configured memory limit.
+ *
+ * The function starts calculating how many bytes should be freed to keep
+ * Redis under the limit, and enters a loop selecting the best keys to
+ * evict accordingly to the configured policy.
+ *
+ * If all the bytes needed to return back under the limit were freed the
+ * function returns C_OK, otherwise C_ERR is returned, and the caller
+ * should block the execution of commands that will result in more memory
+ * used by the server.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * LRU approximation algorithm
+ *
+ * Redis uses an approximation of the LRU algorithm that runs in constant
+ * memory. Every time there is a key to expire, we sample N keys (with
+ * N very small, usually in around 5) to populate a pool of best keys to
+ * evict of M keys (the pool size is defined by MAXMEMORY_EVICTION_POOL_SIZE).
+ *
+ * The N keys sampled are added in the pool of good keys to expire (the one
+ * with an old access time) if they are better than one of the current keys
+ * in the pool.
+ *
+ * After the pool is populated, the best key we have in the pool is expired.
+ * However note that we don't remove keys from the pool when they are deleted
+ * so the pool may contain keys that no longer exist.
+ *
+ * When we try to evict a key, and all the entries in the pool don't exist
+ * we populate it again. This time we'll be sure that the pool has at least
+ * one key that can be evicted, if there is at least one key that can be
+ * evicted in the whole database. */
+
+/* Create a new eviction pool. */
+struct evictionPoolEntry *evictionPoolAlloc(void) {
+ struct evictionPoolEntry *ep;
+ int j;
+
+ ep = zmalloc(sizeof(*ep)*MAXMEMORY_EVICTION_POOL_SIZE);
+ for (j = 0; j < MAXMEMORY_EVICTION_POOL_SIZE; j++) {
+ ep[j].idle = 0;
+ ep[j].key = NULL;
+ }
+ return ep;
+}
+
+/* This is an helper function for freeMemoryIfNeeded(), it is used in order
+ * to populate the evictionPool with a few entries every time we want to
+ * expire a key. Keys with idle time smaller than one of the current
+ * keys are added. Keys are always added if there are free entries.
+ *
+ * We insert keys on place in ascending order, so keys with the smaller
+ * idle time are on the left, and keys with the higher idle time on the
+ * right. */
+
+#define EVICTION_SAMPLES_ARRAY_SIZE 16
+void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
+ int j, k, count;
+ dictEntry *_samples[EVICTION_SAMPLES_ARRAY_SIZE];
+ dictEntry **samples;
+
+ /* Try to use a static buffer: this function is a big hit...
+ * Note: it was actually measured that this helps. */
+ if (server.maxmemory_samples <= EVICTION_SAMPLES_ARRAY_SIZE) {
+ samples = _samples;
+ } else {
+ samples = zmalloc(sizeof(samples[0])*server.maxmemory_samples);
+ }
+
+ count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
+ for (j = 0; j < count; j++) {
+ unsigned long long idle;
+ sds key;
+ robj *o;
+ dictEntry *de;
+
+ de = samples[j];
+ key = dictGetKey(de);
+ /* If the dictionary we are sampling from is not the main
+ * dictionary (but the expires one) we need to lookup the key
+ * again in the key dictionary to obtain the value object. */
+ if (sampledict != keydict) de = dictFind(keydict, key);
+ o = dictGetVal(de);
+ idle = estimateObjectIdleTime(o);
+
+ /* Insert the element inside the pool.
+ * First, find the first empty bucket or the first populated
+ * bucket that has an idle time smaller than our idle time. */
+ k = 0;
+ while (k < MAXMEMORY_EVICTION_POOL_SIZE &&
+ pool[k].key &&
+ pool[k].idle < idle) k++;
+ if (k == 0 && pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key != NULL) {
+ /* Can't insert if the element is < the worst element we have
+ * and there are no empty buckets. */
+ continue;
+ } else if (k < MAXMEMORY_EVICTION_POOL_SIZE && pool[k].key == NULL) {
+ /* Inserting into empty position. No setup needed before insert. */
+ } else {
+ /* Inserting in the middle. Now k points to the first element
+ * greater than the element to insert. */
+ if (pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key == NULL) {
+ /* Free space on the right? Insert at k shifting
+ * all the elements from k to end to the right. */
+ memmove(pool+k+1,pool+k,
+ sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1));
+ } else {
+ /* No free space on right? Insert at k-1 */
+ k--;
+ /* Shift all elements on the left of k (included) to the
+ * left, so we discard the element with smaller idle time. */
+ sdsfree(pool[0].key);
+ memmove(pool,pool+1,sizeof(pool[0])*k);
+ }
+ }
+ pool[k].key = sdsdup(key);
+ pool[k].idle = idle;
+ }
+ if (samples != _samples) zfree(samples);
+}
+
+int freeMemoryIfNeeded(void) {
+ size_t mem_reported, mem_used, mem_tofree, mem_freed;
+ int slaves = listLength(server.slaves);
+ mstime_t latency, eviction_latency;
+ long long delta;
+
+ /* Check if we are over the memory usage limit. If we are not, no need
+ * to subtract the slaves output buffers. We can just return ASAP. */
+ mem_reported = zmalloc_used_memory();
+ if (mem_reported <= server.maxmemory) return C_OK;
+
+ /* Remove the size of slaves output buffers and AOF buffer from the
+ * count of used memory. */
+ mem_used = mem_reported;
+ if (slaves) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = listNodeValue(ln);
+ unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
+ if (obuf_bytes > mem_used)
+ mem_used = 0;
+ else
+ mem_used -= obuf_bytes;
+ }
+ }
+ if (server.aof_state != AOF_OFF) {
+ mem_used -= sdslen(server.aof_buf);
+ mem_used -= aofRewriteBufferSize();
+ }
+
+ /* Check if we are still over the memory limit. */
+ if (mem_used <= server.maxmemory) return C_OK;
+
+ /* Compute how much memory we need to free. */
+ mem_tofree = mem_used - server.maxmemory;
+ mem_freed = 0;
+
+ if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
+ goto cant_free; /* We need to free memory, but policy forbids. */
+
+ latencyStartMonitor(latency);
+ while (mem_freed < mem_tofree) {
+ int j, k, keys_freed = 0;
+
+ for (j = 0; j < server.dbnum; j++) {
+ long bestval = 0; /* just to prevent warning */
+ sds bestkey = NULL;
+ dictEntry *de;
+ redisDb *db = server.db+j;
+ dict *dict;
+
+ if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU ||
+ server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM)
+ {
+ dict = server.db[j].dict;
+ } else {
+ dict = server.db[j].expires;
+ }
+ if (dictSize(dict) == 0) continue;
+
+ /* volatile-random and allkeys-random policy */
+ if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
+ server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
+ {
+ de = dictGetRandomKey(dict);
+ bestkey = dictGetKey(de);
+ }
+
+ /* volatile-lru and allkeys-lru policy */
+ else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU ||
+ server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU)
+ {
+ struct evictionPoolEntry *pool = db->eviction_pool;
+
+ while(bestkey == NULL) {
+ evictionPoolPopulate(dict, db->dict, db->eviction_pool);
+ /* Go backward from best to worst element to evict. */
+ for (k = MAXMEMORY_EVICTION_POOL_SIZE-1; k >= 0; k--) {
+ if (pool[k].key == NULL) continue;
+ de = dictFind(dict,pool[k].key);
+
+ /* Remove the entry from the pool. */
+ sdsfree(pool[k].key);
+ /* Shift all elements on its right to left. */
+ memmove(pool+k,pool+k+1,
+ sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1));
+ /* Clear the element on the right which is empty
+ * since we shifted one position to the left. */
+ pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key = NULL;
+ pool[MAXMEMORY_EVICTION_POOL_SIZE-1].idle = 0;
+
+ /* If the key exists, is our pick. Otherwise it is
+ * a ghost and we need to try the next element. */
+ if (de) {
+ bestkey = dictGetKey(de);
+ break;
+ } else {
+ /* Ghost... */
+ continue;
+ }
+ }
+ }
+ }
+
+ /* volatile-ttl */
+ else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
+ for (k = 0; k < server.maxmemory_samples; k++) {
+ sds thiskey;
+ long thisval;
+
+ de = dictGetRandomKey(dict);
+ thiskey = dictGetKey(de);
+ thisval = (long) dictGetVal(de);
+
+ /* Expire sooner (minor expire unix timestamp) is better
+ * candidate for deletion */
+ if (bestkey == NULL || thisval < bestval) {
+ bestkey = thiskey;
+ bestval = thisval;
+ }
+ }
+ }
+
+ /* Finally remove the selected key. */
+ if (bestkey) {
+ robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
+ propagateExpire(db,keyobj,server.lazyfree_lazy_eviction);
+ /* We compute the amount of memory freed by db*Delete() alone.
+ * It is possible that actually the memory needed to propagate
+ * the DEL in AOF and replication link is greater than the one
+ * we are freeing removing the key, but we can't account for
+ * that otherwise we would never exit the loop.
+ *
+ * AOF and Output buffer memory will be freed eventually so
+ * we only care about memory used by the key space. */
+ delta = (long long) zmalloc_used_memory();
+ latencyStartMonitor(eviction_latency);
+ if (server.lazyfree_lazy_eviction)
+ dbAsyncDelete(db,keyobj);
+ else
+ dbSyncDelete(db,keyobj);
+ latencyEndMonitor(eviction_latency);
+ latencyAddSampleIfNeeded("eviction-del",eviction_latency);
+ latencyRemoveNestedEvent(latency,eviction_latency);
+ delta -= (long long) zmalloc_used_memory();
+ mem_freed += delta;
+ server.stat_evictedkeys++;
+ notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
+ keyobj, db->id);
+ decrRefCount(keyobj);
+ keys_freed++;
+
+ /* When the memory to free starts to be big enough, we may
+ * start spending so much time here that is impossible to
+ * deliver data to the slaves fast enough, so we force the
+ * transmission here inside the loop. */
+ if (slaves) flushSlavesOutputBuffers();
+ }
+ }
+ if (!keys_freed) {
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("eviction-cycle",latency);
+ goto cant_free; /* nothing to free... */
+ }
+ }
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("eviction-cycle",latency);
+ return C_OK;
+
+cant_free:
+ /* We are here if we are not able to reclaim memory. There is only one
+ * last thing we can try: check if the lazyfree thread has jobs in queue
+ * and wait... */
+ while(bioPendingJobsOfType(BIO_LAZY_FREE)) {
+ if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree)
+ break;
+ usleep(1000);
+ }
+ return C_ERR;
+}
+