From b0dd7b324505ebb3b6fac3f3067eba948cf0b09d Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Sat, 6 May 2023 11:53:28 +0800 Subject: Free backlog only if rsi is invalid when master reboot (#12088) When master reboot from RDB, if rsi in RDB is valid we should not free replication backlog, even if master_repl_offset or repl-offset is 0. Since if master doesn't send any data to replicas master_repl_offset is 0, it's a valid number. A clear example: 1. start a master and apply some write commands, the master's master_repl_offset is 0 since it has no replicas. 2. stop write commands on master, and start another instance and replicaof the master, trigger an FULLRESYNC 3. the master's master_repl_offset is still 0 (set a large number for repl-ping-replica-period), do BGSAVE and restart the master 4. master load master_repl_offset from RDB's rsi and it's still 0, and we should make sure replica can partially resync with master. --- src/server.c | 4 ++- tests/integration/psync2-master-restart.tcl | 41 ++++++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/server.c b/src/server.c index 381edabc6..b6ed1d9c5 100644 --- a/src/server.c +++ b/src/server.c @@ -6700,6 +6700,7 @@ void loadDataFromDisk(void) { serverLog(LL_NOTICE, "DB loaded from append only file: %.3f seconds", (float)(ustime()-start)/1000000); } else { rdbSaveInfo rsi = RDB_SAVE_INFO_INIT; + int rsi_is_valid = 0; errno = 0; /* Prevent a stale value from affecting error checking */ int rdb_flags = RDBFLAGS_NONE; if (iAmMaster()) { @@ -6721,6 +6722,7 @@ void loadDataFromDisk(void) { * information in function rdbPopulateSaveInfo. */ rsi.repl_stream_db != -1) { + rsi_is_valid = 1; if (!iAmMaster()) { memcpy(server.replid,rsi.repl_id,sizeof(server.replid)); server.master_repl_offset = rsi.repl_offset; @@ -6754,7 +6756,7 @@ void loadDataFromDisk(void) { * if RDB doesn't have replication info or there is no rdb, it is not * possible to support partial resynchronization, to avoid extra memory * of replication backlog, we drop it. */ - if (server.master_repl_offset == 0 && server.repl_backlog) + if (!rsi_is_valid && server.repl_backlog) freeReplicationBacklog(); } } diff --git a/tests/integration/psync2-master-restart.tcl b/tests/integration/psync2-master-restart.tcl index e925aaab8..a9e21d12d 100644 --- a/tests/integration/psync2-master-restart.tcl +++ b/tests/integration/psync2-master-restart.tcl @@ -21,6 +21,10 @@ start_server {} { $replica config set repl-timeout 3600 $sub_replica config set repl-timeout 3600 + # Avoid PINGs + $master config set repl-ping-replica-period 3600 + $master config rewrite + # Build replication chain $replica replicaof $master_host $master_port $sub_replica replicaof $replica_host $replica_port @@ -32,14 +36,43 @@ start_server {} { fail "Replication not started." } - # Avoid PINGs - $master config set repl-ping-replica-period 3600 - $master config rewrite + test "PSYNC2: Partial resync after Master restart using RDB aux fields when offset is 0" { + assert {[status $master master_repl_offset] == 0} + + set replid [status $master master_replid] + $replica config resetstat + + catch { + restart_server 0 true false true now + set master [srv 0 client] + } + wait_for_condition 50 1000 { + [status $replica master_link_status] eq {up} && + [status $sub_replica master_link_status] eq {up} + } else { + fail "Replicas didn't sync after master restart" + } + + # Make sure master restore replication info correctly + assert {[status $master master_replid] != $replid} + assert {[status $master master_repl_offset] == 0} + assert {[status $master master_replid2] eq $replid} + assert {[status $master second_repl_offset] == 1} + + # Make sure master set replication backlog correctly + assert {[status $master repl_backlog_active] == 1} + assert {[status $master repl_backlog_first_byte_offset] == 1} + assert {[status $master repl_backlog_histlen] == 0} + + # Partial resync after Master restart + assert {[status $master sync_partial_ok] == 1} + assert {[status $replica sync_partial_ok] == 1} + } # Generate some data createComplexDataset $master 1000 - test "PSYNC2: Partial resync after Master restart using RDB aux fields" { + test "PSYNC2: Partial resync after Master restart using RDB aux fields with data" { wait_for_condition 500 100 { [status $master master_repl_offset] == [status $replica master_repl_offset] && [status $master master_repl_offset] == [status $sub_replica master_repl_offset] -- cgit v1.2.1