summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGabriel Marks <gabriel.marks@mongodb.com>2022-01-06 19:26:05 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-01-06 19:55:22 +0000
commita7bf9b5c81e43595d3548b000f5910852eefe6b3 (patch)
tree94c90ff8b79dcefec6269631e571f370363a415d
parenta9bec8a996b1d8fd6d3e28b200d8483f9a944bcb (diff)
downloadmongo-a7bf9b5c81e43595d3548b000f5910852eefe6b3.tar.gz
SERVER-59290 Improve criteria for shouldChangeSyncSource
-rw-r--r--jstests/replsets/initial_sync_chooses_correct_sync_source.js6
-rw-r--r--jstests/replsets/libs/sync_source.js11
-rw-r--r--jstests/replsets/sync_source_changes.js73
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp114
-rw-r--r--src/mongo/db/repl/topology_coordinator.h3
5 files changed, 165 insertions, 42 deletions
diff --git a/jstests/replsets/initial_sync_chooses_correct_sync_source.js b/jstests/replsets/initial_sync_chooses_correct_sync_source.js
index 6e61d6dc123..bc44457e84e 100644
--- a/jstests/replsets/initial_sync_chooses_correct_sync_source.js
+++ b/jstests/replsets/initial_sync_chooses_correct_sync_source.js
@@ -9,6 +9,7 @@
"use strict";
load("jstests/libs/fail_point_util.js");
+load("jstests/replsets/libs/sync_source.js"); // assertSyncSourceMatchesSoon
const waitForHeartbeats = initialSyncNode => {
// Hang the node before it undergoes sync source selection.
@@ -173,10 +174,7 @@ initialSyncNode.adminCommand(
// Once we become secondary, the secondary read preference no longer matters and we choose the
// primary because chaining is disallowed.
-assert.soon(function() {
- let res = assert.commandWorked(initialSyncNode.adminCommand({replSetGetStatus: 1}));
- return res.syncSourceHost == primary.host;
-});
+assertSyncSourceMatchesSoon(initialSyncNode, primary.host);
primary.delayMessagesFrom(initialSyncNode, 0);
TestData.skipCollectionAndIndexValidation = false;
diff --git a/jstests/replsets/libs/sync_source.js b/jstests/replsets/libs/sync_source.js
index 37483912cfe..41be60ec132 100644
--- a/jstests/replsets/libs/sync_source.js
+++ b/jstests/replsets/libs/sync_source.js
@@ -64,6 +64,17 @@ const forceSyncSource = (rst, node, syncSource) => {
return forceSyncSource;
};
+/**
+ * Asserts that the sync source of the given node will match syncSourceName soon. Additional
+ * arguments are passed to the assert.soon.
+ */
+const assertSyncSourceMatchesSoon = (node, syncSourceName, ...assertSoonArgs) => {
+ return assert.soon(() => {
+ const res = assert.commandWorked(node.adminCommand({replSetGetStatus: 1}));
+ return res.syncSourceHost === syncSourceName;
+ }, ...assertSoonArgs);
+};
+
const DataCenter = class {
constructor(name, nodes) {
this.name = name;
diff --git a/jstests/replsets/sync_source_changes.js b/jstests/replsets/sync_source_changes.js
new file mode 100644
index 00000000000..80655c06667
--- /dev/null
+++ b/jstests/replsets/sync_source_changes.js
@@ -0,0 +1,73 @@
+/**
+ * Tests that when the current sync source no longer meets the strict criteria for being a sync
+ * source, and there is another node which does meet those criteria, we will change sync source
+ * (eventually).
+ */
+
+(function() {
+"use strict";
+
+load("jstests/replsets/rslib.js"); // reconfig
+load("jstests/replsets/libs/sync_source.js"); // assertSyncSourceMatchesSoon
+
+// Start RST with only one voting node, node 0 -- this will be the only valid voting node and sync
+// source
+const rst = new ReplSetTest({nodes: [{}, {rsConfig: {priority: 0, votes: 0}}]});
+rst.startSet();
+rst.initiate();
+
+// Make sure that node 0 is primary as expected
+const primary = rst.getPrimary();
+assert.eq(primary, rst.nodes[0]);
+
+// Add a new voting node, node 2 -- voting nodes will choose voting nodes as sync sources.
+const newNode = rst.add({});
+rst.reInitiate();
+rst.waitForState(newNode, ReplSetTest.State.SECONDARY);
+rst.awaitReplication();
+rst.awaitSecondaryNodes();
+
+// Assure that node 2 will set node 0 as its sync source, since it is the best option.
+assertSyncSourceMatchesSoon(newNode, rst.nodes[0].host);
+
+// Make node 1 a voter so that it will be a valid option for sync source
+let cfg = rst.getReplSetConfigFromNode();
+cfg.members[1].priority = 1;
+cfg.members[1].votes = 1;
+reconfig(rst, cfg);
+
+// Force a stepup of node 1 -- we need to step node 0 down so that we can set it as a non-voter
+// without causing errors.
+rst.stepUp(rst.nodes[1]);
+
+// Make node 0 a nonvoter so that it will be an invalid option for sync source
+cfg = rst.getReplSetConfigFromNode();
+cfg.members[0].priority = 0;
+cfg.members[0].votes = 0;
+reconfig(rst, cfg);
+
+// Run this repeatedly, as sometimes the stop, insert, restart won't cause the sync source to be
+// switched correctly due to transient issues with the sync source we want to switch to.
+assert.soon(() => {
+ // Insert a document while newNode is not replicating to force it to run shouldChangeSyncSource
+ stopServerReplication(newNode);
+ assert.commandWorked(
+ rst.getPrimary().getDB("testSyncSourceChangesDb").getCollection("coll").insert({a: 1}, {
+ writeConcern: {w: 1}
+ }));
+ restartServerReplication(newNode);
+ try {
+ assertSyncSourceMatchesSoon(newNode,
+ cfg.members[1].host,
+ undefined /* msg */,
+ 5 * 1000 /* timeout */,
+ undefined /* interval */,
+ {runHangAnalyzer: false});
+ return true;
+ } catch (e) {
+ return false;
+ }
+});
+
+rst.stopSet();
+})();
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index ddc79002521..8c9430b9ed8 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -329,7 +329,8 @@ HostAndPort TopologyCoordinator::_chooseNearbySyncSource(Date_t now,
now,
lastOpTimeFetched,
readPreference,
- attempts == 0 /* firstAttempt */)) {
+ attempts == 0 /* firstAttempt */,
+ true /* shouldCheckStaleness */)) {
// Node is not a viable sync source candidate.
continue;
}
@@ -406,7 +407,8 @@ bool TopologyCoordinator::_isEligibleSyncSource(int candidateIndex,
Date_t now,
const OpTime& lastOpTimeFetched,
ReadPreference readPreference,
- const bool firstAttempt) const {
+ const bool firstAttempt,
+ const bool shouldCheckStaleness) const {
// Don't consider ourselves.
if (candidateIndex == _selfIndex) {
return false;
@@ -464,16 +466,19 @@ bool TopologyCoordinator::_isEligibleSyncSource(int candidateIndex,
"syncSourceCandidate"_attr = syncSourceCandidate);
return false;
}
- // Candidates cannot be excessively behind.
- const auto oldestSyncOpTime = _getOldestSyncOpTime();
- if (memberData.getHeartbeatAppliedOpTime() < oldestSyncOpTime) {
- LOGV2_DEBUG(3873110,
- 2,
- "Cannot select sync source because it is too far behind",
- "syncSourceCandidate"_attr = syncSourceCandidate,
- "syncSourceCandidateOpTime"_attr = memberData.getHeartbeatAppliedOpTime(),
- "oldestAcceptableOpTime"_attr = oldestSyncOpTime);
- return false;
+ // Candidates cannot be excessively behind, if we are checking for staleness.
+ if (shouldCheckStaleness) {
+ const auto oldestSyncOpTime = _getOldestSyncOpTime();
+ if (memberData.getHeartbeatAppliedOpTime() < oldestSyncOpTime) {
+ LOGV2_DEBUG(3873110,
+ 2,
+ "Cannot select sync source because it is too far behind",
+ "syncSourceCandidate"_attr = syncSourceCandidate,
+ "syncSourceCandidateOpTime"_attr =
+ memberData.getHeartbeatAppliedOpTime(),
+ "oldestAcceptableOpTime"_attr = oldestSyncOpTime);
+ return false;
+ }
}
// Candidate must not have a configured delay larger than ours.
if (_selfConfig().getSecondaryDelay() < memberConfig.getSecondaryDelay()) {
@@ -497,8 +502,8 @@ bool TopologyCoordinator::_isEligibleSyncSource(int candidateIndex,
return false;
}
}
- // Only select a candidate that is ahead of me.
- if (memberData.getHeartbeatAppliedOpTime() <= lastOpTimeFetched) {
+ // Only select a candidate that is ahead of me, if we are checking for staleness.
+ if (shouldCheckStaleness && memberData.getHeartbeatAppliedOpTime() <= lastOpTimeFetched) {
LOGV2_DEBUG(3873113,
1,
"Cannot select sync source which is not ahead of me",
@@ -3110,40 +3115,74 @@ bool TopologyCoordinator::shouldChangeSyncSource(const HostAndPort& currentSourc
if (MONGO_unlikely(disableMaxSyncSourceLagSecs.shouldFail())) {
LOGV2(
21833,
- "disableMaxSyncSourceLagSecs fail point enabled - not checking the most recent "
- "OpTime, {currentSyncSourceOpTime}, of our current sync source, {syncSource}, against "
- "the OpTimes of the other nodes in this replica set.",
"disableMaxSyncSourceLagSecs fail point enabled - not checking the most recent OpTime "
"of our current sync source against the OpTimes of the other nodes in this replica set",
"currentSyncSourceOpTime"_attr = currentSourceOpTime.toString(),
"syncSource"_attr = currentSource);
} else {
- unsigned int currentSecs = currentSourceOpTime.getSecs();
- unsigned int goalSecs = currentSecs + durationCount<Seconds>(_options.maxSyncSourceLagSecs);
-
- for (std::vector<MemberData>::const_iterator it = _memberData.begin();
- it != _memberData.end();
- ++it) {
- const int itIndex = indexOfIterator(_memberData, it);
- const MemberConfig& candidateConfig = _rsConfig.getMemberAt(itIndex);
- if (it->up() && (candidateConfig.isVoter() || !_selfConfig().isVoter()) &&
- (candidateConfig.shouldBuildIndexes() || !_selfConfig().shouldBuildIndexes()) &&
- it->getState().readable() && !_memberIsDenylisted(candidateConfig, now) &&
- goalSecs < it->getHeartbeatAppliedOpTime().getSecs()) {
+ unsigned int currentSourceOpTimeSecs = currentSourceOpTime.getSecs();
+ unsigned int currentSourceLagThresholdSecs =
+ currentSourceOpTimeSecs + durationCount<Seconds>(_options.maxSyncSourceLagSecs);
+
+ for (size_t i = 0; i < _memberData.size(); i++) {
+ const auto& member = _memberData[i];
+ if (currentSourceLagThresholdSecs < member.getHeartbeatAppliedOpTime().getSecs() &&
+ _isEligibleSyncSource(i,
+ now,
+ lastOpTimeFetched,
+ ReadPreference::Nearest,
+ true /* firstAttempt */,
+ true /* shouldCheckStaleness */)) {
+ invariant(i != (size_t)_selfIndex,
+ str::stream()
+ << "Node " << i << " was eligible as a sync source for itself");
LOGV2(21834,
- "Choosing new sync source because the most recent OpTime of our sync "
- "source, {syncSource}, is {syncSourceOpTime} which is more than "
- "{maxSyncSourceLagSecs} behind member {otherMember} "
- "whose most recent OpTime is {otherMemberHearbeatAppliedOpTime}",
"Choosing new sync source because the most recent OpTime of our sync source "
"is more than maxSyncSourceLagSecs behind another member",
"syncSource"_attr = currentSource,
"syncSourceOpTime"_attr = currentSourceOpTime.toString(),
"maxSyncSourceLagSecs"_attr = _options.maxSyncSourceLagSecs,
- "otherMember"_attr = candidateConfig.getHostAndPort().toString(),
+ "otherMember"_attr = member.getHostAndPort().toString(),
"otherMemberHearbeatAppliedOpTime"_attr =
- it->getHeartbeatAppliedOpTime().toString());
- invariant(itIndex != _selfIndex);
+ member.getHeartbeatAppliedOpTime().toString());
+ return true;
+ }
+ }
+ }
+
+ // Change sync source if our current sync source is not a preferred sync source node choice due
+ // to non-staleness issues, such as being a non-voter when we are a voter, or being hidden, or
+ // any of the other conditions checked in _isEligibleSyncSource with firstAttempt=true, and
+ // another eligible node exists which does meet these criteria. Note that while we bypass
+ // staleness checks for our current node, we should not do this for a potential new node,
+ // because we could end up with a situation where shouldChangeSyncSource returns true, causing
+ // the sync source to be cleared, but then being reset to our previous sync source repeatedly
+ // because the new source is not actually valid. Note that _isEligibleSyncSource only checks for
+ // ReadPreference::Secondary*, so any choice besides those for the read preference is fine.
+ if (!_isEligibleSyncSource(currentSourceIndex,
+ now,
+ lastOpTimeFetched,
+ ReadPreference::Nearest,
+ true /* firstAttempt */,
+ false /* shouldCheckStaleness */)) {
+
+ for (size_t i = 0; i < _memberData.size(); i++) {
+ if (_isEligibleSyncSource(i,
+ now,
+ lastOpTimeFetched,
+ ReadPreference::Nearest,
+ true /* firstAttempt */,
+ true /* shouldCheckStaleness */)) {
+ invariant(i != (size_t)_selfIndex,
+ str::stream()
+ << "Node " << i << " was eligible as a sync source for itself");
+ LOGV2(5929000,
+ "Choosing new sync source because our current sync source does not satisfy "
+ "our strict criteria for candidates, but there is another member which does "
+ "satisfy these criteria",
+ "currentSyncSource"_attr = currentSource,
+ "eligibleCandidateSyncSource"_attr =
+ _rsConfig.getMemberAt(i).getHostAndPort().toString());
return true;
}
}
@@ -3235,7 +3274,8 @@ bool TopologyCoordinator::shouldChangeSyncSourceDueToPingTime(const HostAndPort&
now,
previousOpTimeFetched,
readPreference,
- true /* firstAttempt */)) {
+ true /* firstAttempt */,
+ true /* shouldCheckStaleness */)) {
LOGV2(4744901,
"Choosing new sync source because we have found another potential sync "
"source that is significantly closer than our current sync source",
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index 2da14343ef9..3e8466ffd21 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -937,7 +937,8 @@ private:
Date_t now,
const OpTime& lastOpTimeFetched,
ReadPreference readPreference,
- bool firstAttempt) const;
+ bool firstAttempt,
+ bool shouldCheckStaleness) const;
// Returns the current "ping" value for the given member by their address.
Milliseconds _getPing(const HostAndPort& host);