diff options
author | judeng <abc3844@126.com> | 2022-04-19 17:06:39 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-19 12:06:39 +0300 |
commit | d4cbd8140bb18ccec6af1561f6c2067f8c00a7b0 (patch) | |
tree | 4d4f348095e93dc9899b20b634ac4a8fd2ac0e6d /src | |
parent | 1a938046453e595cee9c4f559f099118210e0474 (diff) | |
download | redis-d4cbd8140bb18ccec6af1561f6c2067f8c00a7b0.tar.gz |
Fixes around AOF failed rewrite rate limiting (#10582)
Changes:
1. Check the failed rewrite time threshold only when we actually consider triggering a rewrite.
i.e. this should be the last condition tested, since the test has side effects (increasing time threshold)
Could have happened in some rare scenarios
2. no limit in startup state (e.g. after restarting redis that previously failed and had many incr files)
3. the “triggered the limit” log would be recorded only when the limit status is returned
4. remove failure count in log (could be misleading in some cases)
Co-authored-by: chenyang8094 <chenyang8094@users.noreply.github.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/aof.c | 49 | ||||
-rw-r--r-- | src/server.c | 5 |
2 files changed, 27 insertions, 27 deletions
@@ -815,38 +815,39 @@ int openNewIncrAofForAppend(void) { #define AOF_REWRITE_LIMITE_THRESHOLD 3 #define AOF_REWRITE_LIMITE_MAX_MINUTES 60 /* 1 hour */ int aofRewriteLimited(void) { - int limit = 0; - static int limit_delay_minutes = 0; + static int next_delay_minutes = 0; static time_t next_rewrite_time = 0; + /* If the number of incr AOFs exceeds the threshold but server.aof_lastbgrewrite_status is OK, it + * means that redis may have just loaded a dataset containing many incr AOFs. At this time, we + * will not limit the AOFRW. */ unsigned long incr_aof_num = listLength(server.aof_manifest->incr_aof_list); - if (incr_aof_num >= AOF_REWRITE_LIMITE_THRESHOLD) { + if (incr_aof_num < AOF_REWRITE_LIMITE_THRESHOLD || server.aof_lastbgrewrite_status == C_OK) { + /* We may be recovering from limited state, so reset all states. */ + next_delay_minutes = 0; + next_rewrite_time = 0; + return 0; + } + + /* if it is in the limiting state, then check if the next_rewrite_time is reached */ + if (next_rewrite_time != 0) { if (server.unixtime < next_rewrite_time) { - limit = 1; + return 1; } else { - if (limit_delay_minutes == 0) { - limit = 1; - limit_delay_minutes = 1; - } else { - limit_delay_minutes *= 2; - } - - if (limit_delay_minutes > AOF_REWRITE_LIMITE_MAX_MINUTES) { - limit_delay_minutes = AOF_REWRITE_LIMITE_MAX_MINUTES; - } - - next_rewrite_time = server.unixtime + limit_delay_minutes * 60; - - serverLog(LL_WARNING, - "Background AOF rewrite has repeatedly failed %ld times and triggered the limit, will retry in %d minutes", - incr_aof_num, limit_delay_minutes); + next_rewrite_time = 0; + return 0; } - } else { - limit_delay_minutes = 0; - next_rewrite_time = 0; } - return limit; + next_delay_minutes = (next_delay_minutes == 0) ? 1 : (next_delay_minutes * 2); + if (next_delay_minutes > AOF_REWRITE_LIMITE_MAX_MINUTES) { + next_delay_minutes = AOF_REWRITE_LIMITE_MAX_MINUTES; + } + + next_rewrite_time = server.unixtime + next_delay_minutes * 60; + serverLog(LL_WARNING, + "Background AOF rewrite has repeatedly failed and triggered the limit, will retry in %d minutes", next_delay_minutes); + return 1; } /* ---------------------------------------------------------------------------- diff --git a/src/server.c b/src/server.c index 8c073684a..f632279fa 100644 --- a/src/server.c +++ b/src/server.c @@ -1296,13 +1296,12 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { if (server.aof_state == AOF_ON && !hasActiveChildProcess() && server.aof_rewrite_perc && - server.aof_current_size > server.aof_rewrite_min_size && - !aofRewriteLimited()) + server.aof_current_size > server.aof_rewrite_min_size) { long long base = server.aof_rewrite_base_size ? server.aof_rewrite_base_size : 1; long long growth = (server.aof_current_size*100/base) - 100; - if (growth >= server.aof_rewrite_perc) { + if (growth >= server.aof_rewrite_perc && !aofRewriteLimited()) { serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth); rewriteAppendOnlyFileBackground(); } |