summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBinbin <binloveplay1314@qq.com>2023-03-29 20:17:05 +0800
committerGitHub <noreply@github.com>2023-03-29 15:17:05 +0300
commitcb1717865804fdb0561e728d2f3a0a1138099d9d (patch)
treeecd347bc55b44b5891a3ae4fd23fb8a2a41dffe5 /src
parent557ca05d059951f618aab5fcba727fa19ecad729 (diff)
downloadredis-cb1717865804fdb0561e728d2f3a0a1138099d9d.tar.gz
Fix fork done handler wrongly update fsync metrics and enhance AOF_ FSYNC_ALWAYS (#11973)
This PR fix several unrelated bugs that were discovered by the same set of tests (WAITAOF tests in #11713), could make the `WAITAOF` test hang. The change in `backgroundRewriteDoneHandler` is about MP-AOF. That leftover / old code assumes that we started a new AOF file just now (when we have a new base into which we're gonna incrementally write), but the fact is that with MP-AOF, the fork done handler doesn't really affect the incremental file being maintained by the parent process, there's no reason to re-issue `SELECT`, and no reason to update any of the fsync variables in that flow. This should have been deleted with MP-AOF (introduced in #9788, 7.0). The damage is that the update to `aof_fsync_offset` will cause us to miss an fsync in `flushAppendOnlyFile`, that happens if we stop write commands in `AOF_FSYNC_EVERYSEC` while an AOFRW is in progress. This caused a new `WAITAOF` test to sometime hang forever. Also because of MP-AOF, we needed to change `aof_fsync_offset` to `aof_last_incr_fsync_offset` and match it to `aof_last_incr_size` in `flushAppendOnlyFile`. This is because in the past we compared `aof_fsync_offset` and `aof_current_size`, but with MP-AOF it could be the total AOF file will be smaller after AOFRW, and the (already existing) incr file still has data that needs to be fsynced. The change in `flushAppendOnlyFile`, about the `AOF_FSYNC_ALWAYS`, it is follow #6053 (the details is in #5985), we also check `AOF_FSYNC_ALWAYS` to handle a case where appendfsync is changed from everysec to always while there is data that's written but not yet fsynced.
Diffstat (limited to 'src')
-rw-r--r--src/aof.c31
-rw-r--r--src/server.c1
-rw-r--r--src/server.h3
3 files changed, 22 insertions, 13 deletions
diff --git a/src/aof.c b/src/aof.c
index 12bd74376..55bca6e7a 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -758,6 +758,7 @@ void aofOpenIfNeededOnServerStart(void) {
}
server.aof_last_incr_size = getAppendOnlyFileSize(aof_name, NULL);
+ server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
if (incr_aof_len) {
serverLog(LL_NOTICE, "Opening AOF incr file %s on server start", aof_name);
@@ -832,13 +833,14 @@ int openNewIncrAofForAppend(void) {
* is already synced at this point so fsync doesn't matter. */
if (server.aof_fd != -1) {
aof_background_fsync_and_close(server.aof_fd);
- server.aof_fsync_offset = server.aof_current_size;
server.aof_last_fsync = server.unixtime;
}
server.aof_fd = newfd;
/* Reset the aof_last_incr_size. */
server.aof_last_incr_size = 0;
+ /* Reset the aof_last_incr_fsync_offset. */
+ server.aof_last_incr_fsync_offset = 0;
/* Update `server.aof_manifest`. */
if (temp_am) aofManifestFreeAndUpdate(temp_am);
return C_OK;
@@ -952,7 +954,6 @@ void stopAppendOnly(void) {
if (redis_fsync(server.aof_fd) == -1) {
serverLog(LL_WARNING,"Fail to fsync the AOF file: %s",strerror(errno));
} else {
- server.aof_fsync_offset = server.aof_current_size;
server.aof_last_fsync = server.unixtime;
}
close(server.aof_fd);
@@ -962,6 +963,7 @@ void stopAppendOnly(void) {
server.aof_state = AOF_OFF;
server.aof_rewrite_scheduled = 0;
server.aof_last_incr_size = 0;
+ server.aof_last_incr_fsync_offset = 0;
server.fsynced_reploff = -1;
atomicSet(server.fsynced_reploff_pending, 0);
killAppendOnlyChild();
@@ -1083,10 +1085,19 @@ void flushAppendOnlyFile(int force) {
* stop write commands before fsync called in one second,
* the data in page cache cannot be flushed in time. */
if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
- server.aof_fsync_offset != server.aof_current_size &&
+ server.aof_last_incr_fsync_offset != server.aof_last_incr_size &&
server.unixtime > server.aof_last_fsync &&
!(sync_in_progress = aofFsyncInProgress())) {
goto try_fsync;
+
+ /* Check if we need to do fsync even the aof buffer is empty,
+ * the reason is described in the previous AOF_FSYNC_EVERYSEC block,
+ * and AOF_FSYNC_ALWAYS is also checked here to handle a case where
+ * aof_fsync is changed from everysec to always. */
+ } else if (server.aof_fsync == AOF_FSYNC_ALWAYS &&
+ server.aof_last_incr_fsync_offset != server.aof_last_incr_size)
+ {
+ goto try_fsync;
} else {
return;
}
@@ -1253,14 +1264,14 @@ try_fsync:
}
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-fsync-always",latency);
- server.aof_fsync_offset = server.aof_current_size;
+ server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
server.aof_last_fsync = server.unixtime;
atomicSet(server.fsynced_reploff_pending, server.master_repl_offset);
- } else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC &&
- server.unixtime > server.aof_last_fsync)) {
+ } else if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
+ server.unixtime > server.aof_last_fsync) {
if (!sync_in_progress) {
aof_background_fsync(server.aof_fd);
- server.aof_fsync_offset = server.aof_current_size;
+ server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
}
server.aof_last_fsync = server.unixtime;
}
@@ -1766,7 +1777,6 @@ int loadAppendOnlyFiles(aofManifest *am) {
* executed early, but that shouldn't be a problem since everything will be
* fine after the first AOFRW. */
server.aof_rewrite_base_size = base_size;
- server.aof_fsync_offset = server.aof_current_size;
cleanup:
stopLoading(ret == AOF_OK || ret == AOF_TRUNCATED);
@@ -2666,13 +2676,10 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
/* We can safely let `server.aof_manifest` point to 'temp_am' and free the previous one. */
aofManifestFreeAndUpdate(temp_am);
- if (server.aof_fd != -1) {
+ if (server.aof_state != AOF_OFF) {
/* AOF enabled. */
- server.aof_selected_db = -1; /* Make sure SELECT is re-issued */
server.aof_current_size = getAppendOnlyFileSize(new_base_filename, NULL) + server.aof_last_incr_size;
server.aof_rewrite_base_size = server.aof_current_size;
- server.aof_fsync_offset = server.aof_current_size;
- server.aof_last_fsync = server.unixtime;
}
/* We don't care about the return value of `aofDelHistoryFiles`, because the history
diff --git a/src/server.c b/src/server.c
index 4e77d2f6d..4855ee1a8 100644
--- a/src/server.c
+++ b/src/server.c
@@ -2018,6 +2018,7 @@ void initServerConfig(void) {
server.aof_selected_db = -1; /* Make sure the first time will not match */
server.aof_flush_postponed_start = 0;
server.aof_last_incr_size = 0;
+ server.aof_last_incr_fsync_offset = 0;
server.active_defrag_running = 0;
server.notify_keyspace_events = 0;
server.blocked_clients = 0;
diff --git a/src/server.h b/src/server.h
index 1a68071f1..43d85008a 100644
--- a/src/server.h
+++ b/src/server.h
@@ -1742,7 +1742,8 @@ struct redisServer {
off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */
off_t aof_current_size; /* AOF current size (Including BASE + INCRs). */
off_t aof_last_incr_size; /* The size of the latest incr AOF. */
- off_t aof_fsync_offset; /* AOF offset which is already synced to disk. */
+ off_t aof_last_incr_fsync_offset; /* AOF offset which is already requested to be synced to disk.
+ * Compare with the aof_last_incr_size. */
int aof_flush_sleep; /* Micros to sleep before flush. (used by tests) */
int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */
sds aof_buf; /* AOF buffer, written before entering the event loop */