diff options
author | Xuerui Fa <xuerui.fa@mongodb.com> | 2020-05-21 10:07:26 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-21 16:00:54 +0000 |
commit | 300d06e03402aae1240bad4582ad30ea4917e35f (patch) | |
tree | 9c3eeb4fa3680a85798c9b3a0198304fe7095a52 /src | |
parent | cd6190cea6d5fb8ec5def30a4efc9339216ebc59 (diff) | |
download | mongo-300d06e03402aae1240bad4582ad30ea4917e35f.tar.gz |
SERVER-47451: Skip re-evaluating sync source if node does not have enough ping stats from other nodes in replica set
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_v1_test.cpp | 58 |
2 files changed, 62 insertions, 10 deletions
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp index ec94c551cce..2ce302813b9 100644 --- a/src/mongo/db/repl/topology_coordinator.cpp +++ b/src/mongo/db/repl/topology_coordinator.cpp @@ -553,15 +553,15 @@ boost::optional<HostAndPort> TopologyCoordinator::_chooseSyncSourceInitialStep(D } // wait for 2N pings (not counting ourselves) before choosing a sync target - int needMorePings = (_memberData.size() - 1) * 2 - pingsInConfig; + int numPingsNeeded = (_memberData.size() - 1) * 2 - pingsInConfig; - if (needMorePings > 0) { + if (numPingsNeeded > 0) { static Occasionally sampler; if (sampler.tick()) { LOGV2(21783, "waiting for {pingsNeeded} pings from other members before syncing", "Waiting for pings from other members before syncing", - "pingsNeeded"_attr = needMorePings); + "pingsNeeded"_attr = numPingsNeeded); } return HostAndPort(); } @@ -3073,6 +3073,14 @@ bool TopologyCoordinator::shouldChangeSyncSourceDueToPingTime(const HostAndPort& return false; } + // If we have not yet received 5N pings (not counting ourselves), do not re-evaluate our sync + // source. + int numPingsNeeded = (_memberData.size() - 1) * 5 - pingsInConfig; + if (numPingsNeeded > 0) { + return false; + } + + if (_pings.count(currentSource) == 0) { // Ping data for our current sync source could not be found. return false; diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp index 51b410a355d..8b06fc0e0c3 100644 --- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp @@ -4127,13 +4127,16 @@ public: changeSyncSourceThresholdMillis.store(5LL); // Receive an up heartbeat from both sync sources. This will allow 'isEligibleSyncSource()' - // to pass if the node reaches that check. - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, syncSourceOpTime); - ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, syncSourceOpTime); - ASSERT_NO_ACTION(nextAction.getAction()); + // to pass if the node reaches that check. We repeat this 5 times to satisfy that we have + // received at least 5N heartbeats before re-evaluating our sync source. + for (auto i = 0; i < 5; i++) { + HeartbeatResponseAction nextAction = receiveUpHeartbeat( + HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, syncSourceOpTime); + ASSERT_NO_ACTION(nextAction.getAction()); + nextAction = receiveUpHeartbeat( + HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, syncSourceOpTime); + ASSERT_NO_ACTION(nextAction.getAction()); + } } const OpTime election = OpTime(Timestamp(1, 0), 0); @@ -4461,6 +4464,47 @@ TEST_F(ReevalSyncSourceTest, ChangeWhenHaveNotChangedTooManyTimesRecently) { ReadPreference::Nearest)); } +TEST_F(TopoCoordTest, DontChangeWhenNodeRequiresMorePings) { + updateConfig(BSON("_id" + << "rs0" + << "version" << 5 << "term" << 1 << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "host1:27017") + << BSON("_id" << 1 << "host" + << "host2:27017") + << BSON("_id" << 2 << "host" + << "host3:27017")) + << "protocolVersion" << 1), + 0); + // Set 'changeSyncSourceThresholdMillis' to a non-zero value to allow evaluating if the node + // should change sync sources due to ping time. + changeSyncSourceThresholdMillis.store(5LL); + + auto election = OpTime(Timestamp(1, 0), 0); + auto syncSourceOpTime = OpTime(Timestamp(4, 0), 0); + // Set lastOpTimeFetched to be before the sync source's OpTime. + auto lastFetched = OpTime(Timestamp(3, 0), 0); + + // Send fewer heartbeats than the required number to do a re-eval due to ping time. + for (auto i = 0; i < 3; i++) { + HeartbeatResponseAction nextAction = receiveUpHeartbeat( + HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, syncSourceOpTime); + ASSERT_NO_ACTION(nextAction.getAction()); + nextAction = receiveUpHeartbeat( + HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, syncSourceOpTime); + ASSERT_NO_ACTION(nextAction.getAction()); + } + + getTopoCoord().setPing_forTest(HostAndPort("host2"), Milliseconds(10)); + getTopoCoord().setPing_forTest(HostAndPort("host3"), Milliseconds(1)); + + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSourceDueToPingTime(HostAndPort("host2"), + MemberState::RS_SECONDARY, + lastFetched, + now(), + ReadPreference::Nearest)); +} + class HeartbeatResponseReconfigTestV1 : public TopoCoordTest { public: virtual void setUp() { |