From cb1717865804fdb0561e728d2f3a0a1138099d9d Mon Sep 17 00:00:00 2001 From: Binbin Date: Wed, 29 Mar 2023 20:17:05 +0800 Subject: Fix fork done handler wrongly update fsync metrics and enhance AOF_ FSYNC_ALWAYS (#11973) This PR fix several unrelated bugs that were discovered by the same set of tests (WAITAOF tests in #11713), could make the `WAITAOF` test hang. The change in `backgroundRewriteDoneHandler` is about MP-AOF. That leftover / old code assumes that we started a new AOF file just now (when we have a new base into which we're gonna incrementally write), but the fact is that with MP-AOF, the fork done handler doesn't really affect the incremental file being maintained by the parent process, there's no reason to re-issue `SELECT`, and no reason to update any of the fsync variables in that flow. This should have been deleted with MP-AOF (introduced in #9788, 7.0). The damage is that the update to `aof_fsync_offset` will cause us to miss an fsync in `flushAppendOnlyFile`, that happens if we stop write commands in `AOF_FSYNC_EVERYSEC` while an AOFRW is in progress. This caused a new `WAITAOF` test to sometime hang forever. Also because of MP-AOF, we needed to change `aof_fsync_offset` to `aof_last_incr_fsync_offset` and match it to `aof_last_incr_size` in `flushAppendOnlyFile`. This is because in the past we compared `aof_fsync_offset` and `aof_current_size`, but with MP-AOF it could be the total AOF file will be smaller after AOFRW, and the (already existing) incr file still has data that needs to be fsynced. The change in `flushAppendOnlyFile`, about the `AOF_FSYNC_ALWAYS`, it is follow #6053 (the details is in #5985), we also check `AOF_FSYNC_ALWAYS` to handle a case where appendfsync is changed from everysec to always while there is data that's written but not yet fsynced. --- src/aof.c | 31 +++++++++++++++++++------------ src/server.c | 1 + src/server.h | 3 ++- 3 files changed, 22 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/aof.c b/src/aof.c index 12bd74376..55bca6e7a 100644 --- a/src/aof.c +++ b/src/aof.c @@ -758,6 +758,7 @@ void aofOpenIfNeededOnServerStart(void) { } server.aof_last_incr_size = getAppendOnlyFileSize(aof_name, NULL); + server.aof_last_incr_fsync_offset = server.aof_last_incr_size; if (incr_aof_len) { serverLog(LL_NOTICE, "Opening AOF incr file %s on server start", aof_name); @@ -832,13 +833,14 @@ int openNewIncrAofForAppend(void) { * is already synced at this point so fsync doesn't matter. */ if (server.aof_fd != -1) { aof_background_fsync_and_close(server.aof_fd); - server.aof_fsync_offset = server.aof_current_size; server.aof_last_fsync = server.unixtime; } server.aof_fd = newfd; /* Reset the aof_last_incr_size. */ server.aof_last_incr_size = 0; + /* Reset the aof_last_incr_fsync_offset. */ + server.aof_last_incr_fsync_offset = 0; /* Update `server.aof_manifest`. */ if (temp_am) aofManifestFreeAndUpdate(temp_am); return C_OK; @@ -952,7 +954,6 @@ void stopAppendOnly(void) { if (redis_fsync(server.aof_fd) == -1) { serverLog(LL_WARNING,"Fail to fsync the AOF file: %s",strerror(errno)); } else { - server.aof_fsync_offset = server.aof_current_size; server.aof_last_fsync = server.unixtime; } close(server.aof_fd); @@ -962,6 +963,7 @@ void stopAppendOnly(void) { server.aof_state = AOF_OFF; server.aof_rewrite_scheduled = 0; server.aof_last_incr_size = 0; + server.aof_last_incr_fsync_offset = 0; server.fsynced_reploff = -1; atomicSet(server.fsynced_reploff_pending, 0); killAppendOnlyChild(); @@ -1083,10 +1085,19 @@ void flushAppendOnlyFile(int force) { * stop write commands before fsync called in one second, * the data in page cache cannot be flushed in time. */ if (server.aof_fsync == AOF_FSYNC_EVERYSEC && - server.aof_fsync_offset != server.aof_current_size && + server.aof_last_incr_fsync_offset != server.aof_last_incr_size && server.unixtime > server.aof_last_fsync && !(sync_in_progress = aofFsyncInProgress())) { goto try_fsync; + + /* Check if we need to do fsync even the aof buffer is empty, + * the reason is described in the previous AOF_FSYNC_EVERYSEC block, + * and AOF_FSYNC_ALWAYS is also checked here to handle a case where + * aof_fsync is changed from everysec to always. */ + } else if (server.aof_fsync == AOF_FSYNC_ALWAYS && + server.aof_last_incr_fsync_offset != server.aof_last_incr_size) + { + goto try_fsync; } else { return; } @@ -1253,14 +1264,14 @@ try_fsync: } latencyEndMonitor(latency); latencyAddSampleIfNeeded("aof-fsync-always",latency); - server.aof_fsync_offset = server.aof_current_size; + server.aof_last_incr_fsync_offset = server.aof_last_incr_size; server.aof_last_fsync = server.unixtime; atomicSet(server.fsynced_reploff_pending, server.master_repl_offset); - } else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC && - server.unixtime > server.aof_last_fsync)) { + } else if (server.aof_fsync == AOF_FSYNC_EVERYSEC && + server.unixtime > server.aof_last_fsync) { if (!sync_in_progress) { aof_background_fsync(server.aof_fd); - server.aof_fsync_offset = server.aof_current_size; + server.aof_last_incr_fsync_offset = server.aof_last_incr_size; } server.aof_last_fsync = server.unixtime; } @@ -1766,7 +1777,6 @@ int loadAppendOnlyFiles(aofManifest *am) { * executed early, but that shouldn't be a problem since everything will be * fine after the first AOFRW. */ server.aof_rewrite_base_size = base_size; - server.aof_fsync_offset = server.aof_current_size; cleanup: stopLoading(ret == AOF_OK || ret == AOF_TRUNCATED); @@ -2666,13 +2676,10 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { /* We can safely let `server.aof_manifest` point to 'temp_am' and free the previous one. */ aofManifestFreeAndUpdate(temp_am); - if (server.aof_fd != -1) { + if (server.aof_state != AOF_OFF) { /* AOF enabled. */ - server.aof_selected_db = -1; /* Make sure SELECT is re-issued */ server.aof_current_size = getAppendOnlyFileSize(new_base_filename, NULL) + server.aof_last_incr_size; server.aof_rewrite_base_size = server.aof_current_size; - server.aof_fsync_offset = server.aof_current_size; - server.aof_last_fsync = server.unixtime; } /* We don't care about the return value of `aofDelHistoryFiles`, because the history diff --git a/src/server.c b/src/server.c index 4e77d2f6d..4855ee1a8 100644 --- a/src/server.c +++ b/src/server.c @@ -2018,6 +2018,7 @@ void initServerConfig(void) { server.aof_selected_db = -1; /* Make sure the first time will not match */ server.aof_flush_postponed_start = 0; server.aof_last_incr_size = 0; + server.aof_last_incr_fsync_offset = 0; server.active_defrag_running = 0; server.notify_keyspace_events = 0; server.blocked_clients = 0; diff --git a/src/server.h b/src/server.h index 1a68071f1..43d85008a 100644 --- a/src/server.h +++ b/src/server.h @@ -1742,7 +1742,8 @@ struct redisServer { off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */ off_t aof_current_size; /* AOF current size (Including BASE + INCRs). */ off_t aof_last_incr_size; /* The size of the latest incr AOF. */ - off_t aof_fsync_offset; /* AOF offset which is already synced to disk. */ + off_t aof_last_incr_fsync_offset; /* AOF offset which is already requested to be synced to disk. + * Compare with the aof_last_incr_size. */ int aof_flush_sleep; /* Micros to sleep before flush. (used by tests) */ int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */ sds aof_buf; /* AOF buffer, written before entering the event loop */ -- cgit v1.2.1