diff options
author | Jim Brunner <brunnerj@amazon.com> | 2020-09-15 23:16:01 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-09-16 09:16:01 +0300 |
commit | 810e28a397b77fa16037354477370b564474d9be (patch) | |
tree | 8a82cf2b83bbf051e18a5357697d0f6425525e7f /src | |
parent | 6ff741b5cf4d1cf7e23610aa9eef7ed485bd3113 (diff) | |
download | redis-810e28a397b77fa16037354477370b564474d9be.tar.gz |
Incremental eviction processing (#7653)
Rather than blindly evicting until maxmemory limit is achieved, this
update adds a time limit to eviction. While over the maxmemory limit,
eviction will process before each command AND as a timeProc when no
commands are running.
This will reduce the latency impact on many cases, especially pathological
cases like massive used memory increase during dict rehashing.
There is a risk that some other edge cases (like massive pipelined use
of MGET) could cause Redis memory usage to keep growing despite the
eviction attempts, so a new maxmemory-eviction-tenacity config is
introduced to let users mitigate that.
Diffstat (limited to 'src')
-rw-r--r-- | src/config.c | 3 | ||||
-rw-r--r-- | src/evict.c | 227 | ||||
-rw-r--r-- | src/networking.c | 6 | ||||
-rw-r--r-- | src/server.c | 6 | ||||
-rw-r--r-- | src/server.h | 8 |
5 files changed, 154 insertions, 96 deletions
diff --git a/src/config.c b/src/config.c index 64a31fc67..be0494ccd 100644 --- a/src/config.c +++ b/src/config.c @@ -2139,7 +2139,7 @@ static int updateMaxmemory(long long val, long long prev, char **err) { if ((unsigned long long)val < used) { serverLog(LL_WARNING,"WARNING: the new maxmemory value set via CONFIG SET (%llu) is smaller than the current memory usage (%zu). This will result in key eviction and/or the inability to accept new write commands depending on the maxmemory-policy.", server.maxmemory, used); } - freeMemoryIfNeededAndSafe(); + performEvictions(); } return 1; } @@ -2327,6 +2327,7 @@ standardConfig configs[] = { createIntConfig("replica-priority", "slave-priority", MODIFIABLE_CONFIG, 0, INT_MAX, server.slave_priority, 100, INTEGER_CONFIG, NULL, NULL), createIntConfig("repl-diskless-sync-delay", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_diskless_sync_delay, 5, INTEGER_CONFIG, NULL, NULL), createIntConfig("maxmemory-samples", NULL, MODIFIABLE_CONFIG, 1, INT_MAX, server.maxmemory_samples, 5, INTEGER_CONFIG, NULL, NULL), + createIntConfig("maxmemory-eviction-tenacity", NULL, MODIFIABLE_CONFIG, 0, 100, server.maxmemory_eviction_tenacity, 10, INTEGER_CONFIG, NULL, NULL), createIntConfig("timeout", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.maxidletime, 0, INTEGER_CONFIG, NULL, NULL), /* Default client timeout: infinite */ createIntConfig("replica-announce-port", "slave-announce-port", MODIFIABLE_CONFIG, 0, 65535, server.slave_announce_port, 0, INTEGER_CONFIG, NULL, NULL), createIntConfig("tcp-backlog", NULL, IMMUTABLE_CONFIG, 0, INT_MAX, server.tcp_backlog, 511, INTEGER_CONFIG, NULL, NULL), /* TCP listen backlog. */ diff --git a/src/evict.c b/src/evict.c index 5d398c6c9..258e1a25e 100644 --- a/src/evict.c +++ b/src/evict.c @@ -33,13 +33,14 @@ #include "server.h" #include "bio.h" #include "atomicvar.h" +#include <math.h> /* ---------------------------------------------------------------------------- * Data structures * --------------------------------------------------------------------------*/ /* To improve the quality of the LRU approximation we take a set of keys - * that are good candidate for eviction across freeMemoryIfNeeded() calls. + * that are good candidate for eviction across performEvictions() calls. * * Entries inside the eviction pool are taken ordered by idle time, putting * greater idle times to the right (ascending order). @@ -97,25 +98,7 @@ unsigned long long estimateObjectIdleTime(robj *o) { } } -/* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config - * file to limit the max memory used by the server, before processing a - * command. - * - * The goal of the function is to free enough memory to keep Redis under the - * configured memory limit. - * - * The function starts calculating how many bytes should be freed to keep - * Redis under the limit, and enters a loop selecting the best keys to - * evict accordingly to the configured policy. - * - * If all the bytes needed to return back under the limit were freed the - * function returns C_OK, otherwise C_ERR is returned, and the caller - * should block the execution of commands that will result in more memory - * used by the server. - * - * ------------------------------------------------------------------------ - * - * LRU approximation algorithm +/* LRU approximation algorithm * * Redis uses an approximation of the LRU algorithm that runs in constant * memory. Every time there is a key to expire, we sample N keys (with @@ -150,7 +133,7 @@ void evictionPoolAlloc(void) { EvictionPoolLRU = ep; } -/* This is an helper function for freeMemoryIfNeeded(), it is used in order +/* This is an helper function for performEvictions(), it is used in order * to populate the evictionPool with a few entries every time we want to * expire a key. Keys with idle time smaller than one of the current * keys are added. Keys are always added if there are free entries. @@ -341,11 +324,6 @@ unsigned long LFUDecrAndReturn(robj *o) { return counter; } -/* ---------------------------------------------------------------------------- - * The external API for eviction: freeMemoryIfNeeded() is called by the - * server when there is data to add in order to make space if needed. - * --------------------------------------------------------------------------*/ - /* We don't want to count AOF buffers and slaves output buffers as * used memory: the eviction should use mostly data size. This function * returns the sum of AOF and slaves buffer. */ @@ -434,39 +412,113 @@ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *lev return C_ERR; } -/* This function is periodically called to see if there is memory to free - * according to the current "maxmemory" settings. In case we are over the - * memory limit, the function will try to free some memory to return back - * under the limit. - * - * The function returns C_OK if we are under the memory limit or if we - * were over the limit, but the attempt to free memory was successful. - * Otherwise if we are over the memory limit, but not enough memory - * was freed to return back under the limit, the function returns C_ERR. */ -int freeMemoryIfNeeded(void) { - int keys_freed = 0; + +/* The evictionTimeProc is started when "maxmemory" has been breached and + * could not immediately be resolved. This will spin the event loop with short + * eviction cycles until the "maxmemory" condition has resolved or there are no + * more evictable items. */ +static int isEvictionProcRunning = 0; +static int evictionTimeProc( + struct aeEventLoop *eventLoop, long long id, void *clientData) { + UNUSED(eventLoop); + UNUSED(id); + UNUSED(clientData); + + if (performEvictions() == EVICT_RUNNING) return 0; /* keep evicting */ + + /* For EVICT_OK - things are good, no need to keep evicting. + * For EVICT_FAIL - there is nothing left to evict. */ + isEvictionProcRunning = 0; + return AE_NOMORE; +} + +/* Check if it's safe to perform evictions. + * Returns 1 if evictions can be performed + * Returns 0 if eviction processing should be skipped + */ +static int isSafeToPerformEvictions(void) { + /* - There must be no script in timeout condition. + * - Nor we are loading data right now. */ + if (server.lua_timedout || server.loading) return 0; + /* By default replicas should ignore maxmemory * and just be masters exact copies. */ - if (server.masterhost && server.repl_slave_ignore_maxmemory) return C_OK; + if (server.masterhost && server.repl_slave_ignore_maxmemory) return 0; + + /* When clients are paused the dataset should be static not just from the + * POV of clients not being able to write, but also from the POV of + * expires and evictions of keys not being performed. */ + if (clientsArePaused()) return 0; + + return 1; +} + +/* Algorithm for converting tenacity (0-100) to a time limit. */ +static unsigned long evictionTimeLimitUs() { + serverAssert(server.maxmemory_eviction_tenacity >= 0); + serverAssert(server.maxmemory_eviction_tenacity <= 100); + + if (server.maxmemory_eviction_tenacity <= 10) { + /* A linear progression from 0..500us */ + return 50uL * server.maxmemory_eviction_tenacity; + } + if (server.maxmemory_eviction_tenacity < 100) { + /* A 15% geometric progression, resulting in a limit of ~2 min at tenacity==99 */ + return (unsigned long)(500.0 * pow(1.15, server.maxmemory_eviction_tenacity - 10.0)); + } + + return ULONG_MAX; /* No limit to eviction time */ +} + +/* Check that memory usage is within the current "maxmemory" limit. If over + * "maxmemory", attempt to free memory by evicting data (if it's safe to do so). + * + * It's possible for Redis to suddenly be significantly over the "maxmemory" + * setting. This can happen if there is a large allocation (like a hash table + * resize) or even if the "maxmemory" setting is manually adjusted. Because of + * this, it's important to evict for a managed period of time - otherwise Redis + * would become unresponsive while evicting. + * + * The goal of this function is to improve the memory situation - not to + * immediately resolve it. In the case that some items have been evicted but + * the "maxmemory" limit has not been achieved, an aeTimeProc will be started + * which will continue to evict items until memory limits are achieved or + * nothing more is evictable. + * + * This should be called before execution of commands. If EVICT_FAIL is + * returned, commands which will result in increased memory usage should be + * rejected. + * + * Returns: + * EVICT_OK - memory is OK or it's not possible to perform evictions now + * EVICT_RUNNING - memory is over the limit, but eviction is still processing + * EVICT_FAIL - memory is over the limit, and there's nothing to evict + * */ +int performEvictions(void) { + if (!isSafeToPerformEvictions()) return EVICT_OK; + + int keys_freed = 0; size_t mem_reported, mem_tofree, mem_freed; - mstime_t latency, eviction_latency, lazyfree_latency; + mstime_t latency, eviction_latency; long long delta; int slaves = listLength(server.slaves); - int result = C_ERR; + int result = EVICT_FAIL; - /* When clients are paused the dataset should be static not just from the - * POV of clients not being able to write, but also from the POV of - * expires and evictions of keys not being performed. */ - if (clientsArePaused()) return C_OK; if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK) - return C_OK; + return EVICT_OK; + + if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION) + return EVICT_FAIL; /* We need to free memory, but policy forbids. */ + + unsigned long eviction_time_limit_us = evictionTimeLimitUs(); mem_freed = 0; latencyStartMonitor(latency); - if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION) - goto cant_free; /* We need to free memory, but policy forbids. */ + + monotime evictionTimer; + elapsedStart(&evictionTimer); while (mem_freed < mem_tofree) { int j, k, i; @@ -581,60 +633,61 @@ int freeMemoryIfNeeded(void) { decrRefCount(keyobj); keys_freed++; - /* When the memory to free starts to be big enough, we may - * start spending so much time here that is impossible to - * deliver data to the slaves fast enough, so we force the - * transmission here inside the loop. */ - if (slaves) flushSlavesOutputBuffers(); - - /* Normally our stop condition is the ability to release - * a fixed, pre-computed amount of memory. However when we - * are deleting objects in another thread, it's better to - * check, from time to time, if we already reached our target - * memory, since the "mem_freed" amount is computed only - * across the dbAsyncDelete() call, while the thread can - * release the memory all the time. */ - if (server.lazyfree_lazy_eviction && !(keys_freed % 16)) { - if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) { - /* Let's satisfy our stop condition. */ - mem_freed = mem_tofree; + if (keys_freed % 16 == 0) { + /* When the memory to free starts to be big enough, we may + * start spending so much time here that is impossible to + * deliver data to the replicas fast enough, so we force the + * transmission here inside the loop. */ + if (slaves) flushSlavesOutputBuffers(); + + /* Normally our stop condition is the ability to release + * a fixed, pre-computed amount of memory. However when we + * are deleting objects in another thread, it's better to + * check, from time to time, if we already reached our target + * memory, since the "mem_freed" amount is computed only + * across the dbAsyncDelete() call, while the thread can + * release the memory all the time. */ + if (server.lazyfree_lazy_eviction) { + if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) { + break; + } + } + + /* After some time, exit the loop early - even if memory limit + * hasn't been reached. If we suddenly need to free a lot of + * memory, don't want to spend too much time here. */ + if (elapsedUs(evictionTimer) > eviction_time_limit_us) { + // We still need to free memory - start eviction timer proc + if (!isEvictionProcRunning) { + isEvictionProcRunning = 1; + aeCreateTimeEvent(server.el, 0, + evictionTimeProc, NULL, NULL); + } + break; } } } else { goto cant_free; /* nothing to free... */ } } - result = C_OK; + /* at this point, the memory is OK, or we have reached the time limit */ + result = (isEvictionProcRunning) ? EVICT_RUNNING : EVICT_OK; cant_free: - /* We are here if we are not able to reclaim memory. There is only one - * last thing we can try: check if the lazyfree thread has jobs in queue - * and wait... */ - if (result != C_OK) { - latencyStartMonitor(lazyfree_latency); - while(bioPendingJobsOfType(BIO_LAZY_FREE)) { + if (result == EVICT_FAIL) { + /* At this point, we have run out of evictable items. It's possible + * that some items are being freed in the lazyfree thread. Perform a + * short wait here if such jobs exist, but don't wait long. */ + if (bioPendingJobsOfType(BIO_LAZY_FREE)) { + usleep(eviction_time_limit_us); if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) { - result = C_OK; - break; + result = EVICT_OK; } - usleep(1000); } - latencyEndMonitor(lazyfree_latency); - latencyAddSampleIfNeeded("eviction-lazyfree",lazyfree_latency); } + latencyEndMonitor(latency); latencyAddSampleIfNeeded("eviction-cycle",latency); return result; } -/* This is a wrapper for freeMemoryIfNeeded() that only really calls the - * function if right now there are the conditions to do so safely: - * - * - There must be no script in timeout condition. - * - Nor we are loading data right now. - * - */ -int freeMemoryIfNeededAndSafe(void) { - if (server.lua_timedout || server.loading) return C_OK; - return freeMemoryIfNeeded(); -} diff --git a/src/networking.c b/src/networking.c index cf40b69e4..8090e391d 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1830,8 +1830,8 @@ int processCommandAndResetClient(client *c) { } if (server.current_client == NULL) deadclient = 1; server.current_client = NULL; - /* freeMemoryIfNeeded may flush slave output buffers. This may - * result into a slave, that may be the active client, to be + /* performEvictions may flush slave output buffers. This may + * result in a slave, that may be the active client, to be * freed. */ return deadclient ? C_ERR : C_OK; } @@ -2797,7 +2797,7 @@ void asyncCloseClientOnOutputBufferLimitReached(client *c) { } } -/* Helper function used by freeMemoryIfNeeded() in order to flush slaves +/* Helper function used by performEvictions() in order to flush slaves * output buffers without returning control to the event loop. * This is also called by SHUTDOWN for a best-effort attempt to send * slaves the latest writes. */ diff --git a/src/server.c b/src/server.c index a5b2538fa..b0b12d59d 100644 --- a/src/server.c +++ b/src/server.c @@ -3681,9 +3681,9 @@ int processCommand(client *c) { * condition, to avoid mixing the propagation of scripts with the * propagation of DELs due to eviction. */ if (server.maxmemory && !server.lua_timedout) { - int out_of_memory = freeMemoryIfNeededAndSafe() == C_ERR; - /* freeMemoryIfNeeded may flush slave output buffers. This may result - * into a slave, that may be the active client, to be freed. */ + int out_of_memory = (performEvictions() == EVICT_FAIL); + /* performEvictions may flush slave output buffers. This may result + * in a slave, that may be the active client, to be freed. */ if (server.current_client == NULL) return C_ERR; int reject_cmd_on_oom = is_denyoom_command; diff --git a/src/server.h b/src/server.h index 8d26e6009..4122008dc 100644 --- a/src/server.h +++ b/src/server.h @@ -1358,6 +1358,7 @@ struct redisServer { unsigned long long maxmemory; /* Max number of memory bytes to use */ int maxmemory_policy; /* Policy for key eviction */ int maxmemory_samples; /* Precision of random sampling */ + int maxmemory_eviction_tenacity;/* Aggressiveness of eviction processing */ int lfu_log_factor; /* LFU logarithmic counter factor. */ int lfu_decay_time; /* LFU counter decay factor. */ long long proto_max_bulk_len; /* Protocol bulk length maximum size. */ @@ -1996,8 +1997,6 @@ int zslLexValueLteMax(sds value, zlexrangespec *spec); /* Core functions */ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level); size_t freeMemoryGetNotCountedMemory(); -int freeMemoryIfNeeded(void); -int freeMemoryIfNeededAndSafe(void); int processCommand(client *c); void setupSignalHandlers(void); void removeSignalHandlers(void); @@ -2234,6 +2233,11 @@ void evictionPoolAlloc(void); unsigned long LFUGetTimeInMinutes(void); uint8_t LFULogIncr(uint8_t value); unsigned long LFUDecrAndReturn(robj *o); +#define EVICT_OK 0 +#define EVICT_RUNNING 1 +#define EVICT_FAIL 2 +int performEvictions(void); + /* Keys hashing / comparison functions for dict.c hash tables. */ uint64_t dictSdsHash(const void *key); |