summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/replsets/chaining_removal.js16
-rw-r--r--src/mongo/db/repl/SConscript1
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.cpp48
3 files changed, 48 insertions, 17 deletions
diff --git a/jstests/replsets/chaining_removal.js b/jstests/replsets/chaining_removal.js
index 027de0215d6..744b6bb8de5 100644
--- a/jstests/replsets/chaining_removal.js
+++ b/jstests/replsets/chaining_removal.js
@@ -26,6 +26,16 @@
var primary = replTest.getPrimary();
replTest.awaitReplication();
+ // When setting up chaining on slow machines, we do not want slow writes or delayed heartbeats
+ // to cause our nodes to invalidate the sync source provided in the 'replSetSyncFrom' command.
+ // To achieve this, we disable the server parameter 'maxSyncSourceLagSecs' (see
+ // repl_settings_init.cpp and TopologyCoordinatorImpl::Options) in
+ // TopologyCoordinatorImpl::shouldChangeSyncSource().
+ assert.commandWorked(nodes[1].getDB('admin').runCommand(
+ {configureFailPoint: 'disableMaxSyncSourceLagSecs', mode: 'alwaysOn'}));
+ assert.commandWorked(nodes[4].getDB('admin').runCommand(
+ {configureFailPoint: 'disableMaxSyncSourceLagSecs', mode: 'alwaysOn'}));
+
// Force node 1 to sync directly from node 0.
syncFrom(nodes[1], nodes[0], replTest);
@@ -39,6 +49,12 @@
};
assert.writeOK(primary.getDB(name).foo.insert({x: 1}, options));
+ // Re-enable 'maxSyncSourceLagSecs' checking on sync source.
+ assert.commandWorked(nodes[1].getDB('admin').runCommand(
+ {configureFailPoint: 'disableMaxSyncSourceLagSecs', mode: 'off'}));
+ assert.commandWorked(nodes[4].getDB('admin').runCommand(
+ {configureFailPoint: 'disableMaxSyncSourceLagSecs', mode: 'off'}));
+
var config = primary.getDB("local").system.replset.findOne();
config.members.pop();
config.version++;
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index 007a37c5c25..af7515855f9 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -250,6 +250,7 @@ env.Library('topology_coordinator_impl',
'repl_settings',
'rslog',
'topology_coordinator',
+ '$BUILD_DIR/mongo/util/fail_point',
])
env.CppUnitTest('repl_set_heartbeat_response_test',
diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp
index 7450dabc576..53c79c80b03 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl.cpp
@@ -51,6 +51,7 @@
#include "mongo/db/server_parameters.h"
#include "mongo/rpc/metadata/repl_set_metadata.h"
#include "mongo/s/catalog/catalog_manager.h"
+#include "mongo/util/fail_point_service.h"
#include "mongo/util/hex.h"
#include "mongo/util/log.h"
#include "mongo/util/mongoutils/str.h"
@@ -67,6 +68,11 @@ const Seconds TopologyCoordinatorImpl::VoteLease::leaseTime = Seconds(30);
// must be before it will call for a priority takeover election.
MONGO_EXPORT_STARTUP_SERVER_PARAMETER(priorityTakeoverFreshnessWindowSeconds, int, 2);
+// If this fail point is enabled, TopologyCoordinatorImpl::shouldChangeSyncSource() will ignore
+// the option TopologyCoordinatorImpl::Options::maxSyncSourceLagSecs. The sync source will not be
+// re-evaluated if it lags behind another node by more than 'maxSyncSourceLagSecs' seconds.
+MONGO_FP_DECLARE(disableMaxSyncSourceLagSecs);
+
namespace {
template <typename T>
@@ -2413,26 +2419,34 @@ bool TopologyCoordinatorImpl::shouldChangeSyncSource(const HostAndPort& currentS
return true;
}
- unsigned int currentSecs = currentSourceOpTime.getSecs();
- unsigned int goalSecs = currentSecs + durationCount<Seconds>(_options.maxSyncSourceLagSecs);
+ if (MONGO_FAIL_POINT(disableMaxSyncSourceLagSecs)) {
+ log() << "disableMaxSyncSourceLagSecs fail point enabled - not checking the most recent "
+ "OpTime, " << currentSourceOpTime.toString() << ", of our current sync source, "
+ << currentSource << ", against the OpTimes of the other nodes in this replica set.";
+ } else {
+ unsigned int currentSecs = currentSourceOpTime.getSecs();
+ unsigned int goalSecs = currentSecs + durationCount<Seconds>(_options.maxSyncSourceLagSecs);
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end();
- ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- const MemberConfig& candidateConfig = _rsConfig.getMemberAt(itIndex);
- if (it->up() && (candidateConfig.isVoter() || !_selfConfig().isVoter()) &&
- (candidateConfig.shouldBuildIndexes() || !_selfConfig().shouldBuildIndexes()) &&
- it->getState().readable() && !_memberIsBlacklisted(candidateConfig, now) &&
- goalSecs < it->getAppliedOpTime().getSecs()) {
- log() << "re-evaluating sync source because our current sync source's most recent "
- << "OpTime is " << currentSourceOpTime.toString() << " which is more than "
- << _options.maxSyncSourceLagSecs << " behind member "
- << candidateConfig.getHostAndPort().toString() << " whose most recent OpTime is "
- << it->getAppliedOpTime().toString();
- invariant(itIndex != _selfIndex);
- return true;
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
+ it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ const MemberConfig& candidateConfig = _rsConfig.getMemberAt(itIndex);
+ if (it->up() && (candidateConfig.isVoter() || !_selfConfig().isVoter()) &&
+ (candidateConfig.shouldBuildIndexes() || !_selfConfig().shouldBuildIndexes()) &&
+ it->getState().readable() && !_memberIsBlacklisted(candidateConfig, now) &&
+ goalSecs < it->getAppliedOpTime().getSecs()) {
+ log() << "re-evaluating sync source because our current sync source's most recent "
+ << "OpTime is " << currentSourceOpTime.toString() << " which is more than "
+ << _options.maxSyncSourceLagSecs << " behind member "
+ << candidateConfig.getHostAndPort().toString()
+ << " whose most recent OpTime is " << it->getAppliedOpTime().toString();
+ invariant(itIndex != _selfIndex);
+ return true;
+ }
}
}
+
return false;
}