summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOran Agra <oran@redislabs.com>2022-11-09 10:02:18 +0200
committerOran Agra <oran@redislabs.com>2022-12-12 17:36:34 +0200
commit7c956d5cdfbd682d6b4a464a7dc55d8152cf6f2b (patch)
treed8d553deb9d5f45675caf3ef0c9edb2d97ca1d58
parent19d01b62ca985bf205d65b1003d39a44149d126d (diff)
downloadredis-7c956d5cdfbd682d6b4a464a7dc55d8152cf6f2b.tar.gz
diskless master, avoid bgsave child hung when fork parent crashes (#11463)
During a diskless sync, if the master main process crashes, the child would have hung in `write`. This fix closes the read fd on the child side, so that if the parent crashes, the child will get a write error and exit. This change also fixes disk-based replication, BGSAVE and AOFRW. In that case the child wouldn't have been hang, it would have just kept running until done which may be pointless. There is a certain degree of risk here. in case there's a BGSAVE child that could maybe succeed and the parent dies for some reason, the old code would have let the child keep running and maybe succeed and avoid data loss. On the other hand, if the parent is restarted, it would have loaded an old rdb file (or none), and then the child could reach the end and rename the rdb file (data conflicting with what the parent has), or also have a race with another BGSAVE child that the new parent started. Note that i removed a comment saying a write error will be ignored in the child and handled by the parent (this comment was very old and i don't think relevant). (cherry picked from commit ccaef5c923a14dc183c50530f52ada0fda012179)
-rw-r--r--src/childinfo.c4
-rw-r--r--src/rdb.c4
-rw-r--r--src/server.c4
-rw-r--r--tests/integration/replication.tcl41
-rw-r--r--tests/support/util.tcl8
5 files changed, 59 insertions, 2 deletions
diff --git a/src/childinfo.c b/src/childinfo.c
index 015987c84..e5184ff8b 100644
--- a/src/childinfo.c
+++ b/src/childinfo.c
@@ -112,7 +112,9 @@ void sendChildInfoGeneric(childInfoType info_type, size_t keys, double progress,
ssize_t wlen = sizeof(data);
if (write(server.child_info_pipe[1], &data, wlen) != wlen) {
- /* Nothing to do on error, this will be detected by the other side. */
+ /* Failed writing to parent, it could have been killed, exit. */
+ serverLog(LL_WARNING,"Child failed reporting info to parent, exiting. %s", strerror(errno));
+ exit(1);
}
}
diff --git a/src/rdb.c b/src/rdb.c
index b53fbdb21..7a6ef88b5 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -3418,6 +3418,10 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
rioInitWithFd(&rdb,rdb_pipe_write);
+ /* Close the reading part, so that if the parent crashes, the child will
+ * get a write error and exit. */
+ close(server.rdb_pipe_read);
+
redisSetProcTitle("redis-rdb-to-slaves");
redisSetCpuAffinity(server.bgsave_cpulist);
diff --git a/src/server.c b/src/server.c
index 7ace51087..ebeb358d9 100644
--- a/src/server.c
+++ b/src/server.c
@@ -6366,6 +6366,10 @@ int redisFork(int purpose) {
setOOMScoreAdj(CONFIG_OOM_BGCHILD);
dismissMemoryInChild();
closeChildUnusedResourceAfterFork();
+ /* Close the reading part, so that if the parent crashes, the child will
+ * get a write error and exit. */
+ if (server.child_info_pipe[0] != -1)
+ close(server.child_info_pipe[0]);
} else {
/* Parent */
if (childpid == -1) {
diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl
index d60c91918..53fb44269 100644
--- a/tests/integration/replication.tcl
+++ b/tests/integration/replication.tcl
@@ -992,7 +992,7 @@ test "diskless replication child being killed is collected" {
# wait for the replicas to start reading the rdb
wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 800 10
- # wait to be sure the eplica is hung and the master is blocked on write
+ # wait to be sure the replica is hung and the master is blocked on write
after 500
# simulate the OOM killer or anyone else kills the child
@@ -1012,6 +1012,45 @@ test "diskless replication child being killed is collected" {
}
} {} {external:skip}
+foreach mdl {yes no} {
+ test "replication dies when parent is killed - diskless: $mdl" {
+ # when master is killed, make sure the fork child can detect that and exit
+ start_server {tags {"repl"}} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ set master_pid [srv 0 pid]
+ $master config set repl-diskless-sync $mdl
+ $master config set repl-diskless-sync-delay 0
+ # create keys that will take 10 seconds to save
+ $master config set rdb-key-save-delay 1000
+ $master debug populate 10000
+ start_server {} {
+ set replica [srv 0 client]
+ $replica replicaof $master_host $master_port
+
+ # wait for rdb child to start
+ wait_for_condition 5000 10 {
+ [s -1 rdb_bgsave_in_progress] == 1
+ } else {
+ fail "rdb child didn't start"
+ }
+ set fork_child_pid [get_child_pid -1]
+
+ # simulate the OOM killer or anyone else kills the parent
+ exec kill -9 $master_pid
+
+ # wait for the child to notice the parent died have exited
+ wait_for_condition 500 10 {
+ [process_is_alive $fork_child_pid] == 0
+ } else {
+ fail "rdb child didn't terminate"
+ }
+ }
+ }
+ } {} {external:skip}
+}
+
test "diskless replication read pipe cleanup" {
# In diskless replication, we create a read pipe for the RDB, between the child and the parent.
# When we close this pipe (fd), the read handler also needs to be removed from the event loop (if it still registered).
diff --git a/tests/support/util.tcl b/tests/support/util.tcl
index db21dea96..5300ab1c8 100644
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@@ -633,6 +633,14 @@ proc get_child_pid {idx} {
return $child_pid
}
+proc process_is_alive pid {
+ if {[catch {exec ps -p $pid} err]} {
+ return 0
+ } else {
+ return 1
+ }
+}
+
proc cmdrstat {cmd r} {
if {[regexp "\r\ncmdstat_$cmd:(.*?)\r\n" [$r info commandstats] _ value]} {
set _ $value