summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVesselina Ratcheva <vesselina.ratcheva@10gen.com>2018-06-25 14:38:41 -0400
committerVesselina Ratcheva <vesselina.ratcheva@10gen.com>2018-07-12 15:52:05 -0400
commit069e6f06b4e0faef45661ac043c7a4ebcf026d96 (patch)
treee1e6b7a404227320360d7ded76d11e0f2bd52141
parent2e975546f4676e1ba65e3376c08bd43bee14305f (diff)
downloadmongo-069e6f06b4e0faef45661ac043c7a4ebcf026d96.tar.gz
SERVER-35623 Send a replSetStepUp command to an eligible candidate on stepdown
-rw-r--r--jstests/replsets/election_handoff_basic.js26
-rw-r--r--jstests/replsets/election_handoff_flip.js27
-rw-r--r--jstests/replsets/election_handoff_higher_priority.js31
-rw-r--r--jstests/replsets/election_handoff_one_unelectable.js29
-rw-r--r--jstests/replsets/libs/election_handoff.js73
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp41
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h5
7 files changed, 232 insertions, 0 deletions
diff --git a/jstests/replsets/election_handoff_basic.js b/jstests/replsets/election_handoff_basic.js
new file mode 100644
index 00000000000..c11a60612a2
--- /dev/null
+++ b/jstests/replsets/election_handoff_basic.js
@@ -0,0 +1,26 @@
+/**
+ * This is a basic test that checks that, with election handoff is enabled, a primary that steps
+ * down sends a ReplSetStepUp request to an eligible candidate. It uses a two-node replica set,
+ * so there is only one secondary that can take over.
+ */
+
+(function() {
+ "use strict";
+ load("jstests/replsets/libs/election_handoff.js");
+
+ const testName = "election_handoff_vanilla";
+ const numNodes = 2;
+ const rst = ReplSetTest({name: testName, nodes: numNodes});
+ const nodes = rst.nodeList();
+ rst.startSet();
+
+ // Make sure there are no election timeouts firing for the duration of the test. This helps
+ // ensure that the test will only pass if the election handoff succeeds.
+ const config = rst.getReplSetConfig();
+ config.settings = {"electionTimeoutMillis": 12 * 60 * 60 * 1000};
+ rst.initiate(config);
+
+ ElectionHandoffTest.testElectionHandoff(rst, 0, 1);
+
+ rst.stopSet();
+})(); \ No newline at end of file
diff --git a/jstests/replsets/election_handoff_flip.js b/jstests/replsets/election_handoff_flip.js
new file mode 100644
index 00000000000..6e6c6f7bd66
--- /dev/null
+++ b/jstests/replsets/election_handoff_flip.js
@@ -0,0 +1,27 @@
+/**
+ * This test uses a two-node replica set and exercises election handoff from one node to the other,
+ * then back to the first one.
+ */
+
+(function() {
+ "use strict";
+ load("jstests/replsets/libs/election_handoff.js");
+
+ const testName = "election_handoff_flip";
+ const numNodes = 2;
+ const rst = ReplSetTest({name: testName, nodes: numNodes});
+ const nodes = rst.nodeList();
+ rst.startSet();
+
+ // Make sure there are no election timeouts firing for the duration of the test. This helps
+ // ensure that the test will only pass if the election handoff succeeds.
+ const config = rst.getReplSetConfig();
+ config.settings = {"electionTimeoutMillis": 12 * 60 * 60 * 1000};
+ rst.initiate(config);
+
+ ElectionHandoffTest.testElectionHandoff(rst, 0, 1);
+ sleep(ElectionHandoffTest.stepDownPeriodSecs * 1000);
+ ElectionHandoffTest.testElectionHandoff(rst, 1, 0);
+
+ rst.stopSet();
+})(); \ No newline at end of file
diff --git a/jstests/replsets/election_handoff_higher_priority.js b/jstests/replsets/election_handoff_higher_priority.js
new file mode 100644
index 00000000000..78a866a1201
--- /dev/null
+++ b/jstests/replsets/election_handoff_higher_priority.js
@@ -0,0 +1,31 @@
+/**
+ * This is a test that checks that, with election handoff is enabled, a primary that steps
+ * down sends a ReplSetStepUp request to an eligible candidate. This test uses a three node
+ * replica set, where one of the secondaries has a higher priority than the other. The test
+ * expects that that secondary gets chosen as the election handoff candidate.
+ */
+
+(function() {
+ "use strict";
+ load("jstests/replsets/libs/election_handoff.js");
+
+ const testName = "election_handoff_higher_priority";
+ const numNodes = 3;
+ const rst = ReplSetTest({name: testName, nodes: numNodes});
+ const nodes = rst.nodeList();
+ rst.startSet();
+
+ const config = rst.getReplSetConfig();
+ config.members[0].priority = 3;
+ config.members[1].priority = 1;
+ config.members[2].priority = 2;
+
+ // Make sure there are no election timeouts firing for the duration of the test. This helps
+ // ensure that the test will only pass if the election handoff succeeds.
+ config.settings = {"electionTimeoutMillis": 12 * 60 * 60 * 1000};
+ rst.initiate(config);
+
+ ElectionHandoffTest.testElectionHandoff(rst, 0, 2);
+
+ rst.stopSet();
+})(); \ No newline at end of file
diff --git a/jstests/replsets/election_handoff_one_unelectable.js b/jstests/replsets/election_handoff_one_unelectable.js
new file mode 100644
index 00000000000..97546cbb1ea
--- /dev/null
+++ b/jstests/replsets/election_handoff_one_unelectable.js
@@ -0,0 +1,29 @@
+/**
+ * This is a basic test that checks that, with election handoff enabled, a primary that steps down
+ * sends a ReplSetStepUp request to an eligible candidate. This test uses a three-node replica
+ * set, where one of the secondaries is unelectable, so the test expects the other one to get
+ * chosen for election handoff.
+ */
+
+(function() {
+ "use strict";
+ load("jstests/replsets/libs/election_handoff.js");
+
+ const testName = "election_handoff_one_unelectable";
+ const numNodes = 3;
+ const rst = ReplSetTest({name: testName, nodes: numNodes});
+ const nodes = rst.nodeList();
+ rst.startSet();
+
+ const config = rst.getReplSetConfig();
+ config.members[1].priority = 0;
+
+ // Make sure there are no election timeouts firing for the duration of the test. This helps
+ // ensure that the test will only pass if the election handoff succeeds.
+ config.settings = {"electionTimeoutMillis": 12 * 60 * 60 * 1000};
+ rst.initiate(config);
+
+ ElectionHandoffTest.testElectionHandoff(rst, 0, 2);
+
+ rst.stopSet();
+})(); \ No newline at end of file
diff --git a/jstests/replsets/libs/election_handoff.js b/jstests/replsets/libs/election_handoff.js
new file mode 100644
index 00000000000..95b5effbc04
--- /dev/null
+++ b/jstests/replsets/libs/election_handoff.js
@@ -0,0 +1,73 @@
+"use strict";
+
+/**
+ * This file is used for testing election handoff.
+ */
+
+var ElectionHandoffTest = (function() {
+
+ load("jstests/libs/check_log.js");
+ load("jstests/replsets/rslib.js");
+
+ const kStepDownPeriodSecs = 30;
+
+ /**
+ * Exercises and validates an election handoff scenario by stepping down the primary and
+ * ensuring that the node at "expectedCandidateId" is stepped up in its place. The desired
+ * configuration of the replica set is passed in as its ReplSetTest instance.
+ */
+ function testElectionHandoff(rst, initialPrimaryId, expectedCandidateId) {
+ const config = rst.getReplSetConfigFromNode();
+ const numNodes = config.members.length;
+ const memberInfo = config.members[expectedCandidateId];
+
+ assert.neq(
+ true, memberInfo["arbiterOnly"], "Election handoff candidate cannot be an arbiter.");
+ assert.neq(
+ 0, memberInfo["priority"], "Election handoff candidate cannot have zero priority");
+
+ rst.awaitNodesAgreeOnPrimary();
+ const primary = rst.getPrimary();
+ assert.eq(rst.nodes[initialPrimaryId], primary);
+
+ // Store the term for future verification.
+ const status = assert.commandWorked(primary.adminCommand({replSetGetStatus: 1}));
+ const term = +status.term;
+
+ jsTestLog("Enabling election handoff...");
+
+ // Enable election handoff.
+ assert.commandWorked(primary.adminCommand({setParameter: 1, handOffElectionOnStepdown: 1}));
+
+ jsTestLog("Stepping down primary...");
+
+ // Step down the current primary.
+ assert.adminCommandWorkedAllowingNetworkError(primary, {
+ replSetStepDown: kStepDownPeriodSecs,
+ secondaryCatchUpPeriodSecs: kStepDownPeriodSecs / 2
+ });
+
+ jsTestLog(`Checking that the secondary with id ${expectedCandidateId} is stepped up...`);
+
+ const expectedCandidate = rst.nodes[expectedCandidateId];
+
+ // The checkLog() function blocks until the log line appears.
+ checkLog.contains(expectedCandidate, "Starting an election due to step up request");
+
+ // If there are only two nodes in the set, verify that the old primary voted "yes".
+ if (numNodes === 2) {
+ checkLog.contains(
+ expectedCandidate,
+ `VoteRequester(term ${term} dry run) received a yes vote from ${primary.host}`);
+ checkLog.contains(
+ expectedCandidate,
+ `VoteRequester(term ${term+1}) received a yes vote from ${primary.host}`);
+ }
+
+ rst.awaitNodesAgreeOnPrimary();
+ assert.eq(rst.nodes[expectedCandidateId], rst.getPrimary());
+ }
+
+ return {testElectionHandoff: testElectionHandoff, stepDownPeriodSecs: kStepDownPeriodSecs};
+
+})(); \ No newline at end of file
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 56e5fdf774f..ae9ca6fc713 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -105,6 +105,8 @@ const OperationContext::Decoration<bool> alwaysAllowNonLocalWrites =
MONGO_EXPORT_SERVER_PARAMETER(numInitialSyncAttempts, int, 10);
+MONGO_EXPORT_SERVER_PARAMETER(handOffElectionOnStepdown, bool, false);
+
// Number of seconds between noop writer writes.
MONGO_EXPORT_STARTUP_SERVER_PARAMETER(periodicNoopIntervalSecs, int, 10);
@@ -1759,13 +1761,52 @@ Status ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
// Stepdown success!
onExitGuard.Dismiss();
updateMemberState();
+
// Schedule work to (potentially) step back up once the stepdown period has ended.
_scheduleWorkAt(stepDownUntil, [=](const executor::TaskExecutor::CallbackArgs& cbData) {
_handleTimePassing(cbData);
});
+
+ // If election handoff is enabled, schedule a step-up immediately instead of waiting for the
+ // election timeout to expire.
+ if (!force && handOffElectionOnStepdown.load()) {
+ _performElectionHandoff();
+ }
return Status::OK();
}
+void ReplicationCoordinatorImpl::_performElectionHandoff() {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ auto candidateIndex = _topCoord->chooseElectionHandoffCandidate();
+
+ if (candidateIndex < 0) {
+ log() << "Could not find node to hand off election to.";
+ return;
+ }
+
+ auto target = _rsConfig.getMemberAt(candidateIndex).getHostAndPort();
+ executor::RemoteCommandRequest request(target, "admin", BSON("replSetStepUp" << 1), nullptr);
+ log() << "Handing off election to " << target;
+
+ auto callbackHandleSW = _replExecutor->scheduleRemoteCommand(
+ request, [target](const executor::TaskExecutor::RemoteCommandCallbackArgs& callbackData) {
+ auto status = callbackData.response.status;
+
+ if (status.isOK()) {
+ LOG(1) << "replSetStepUp request to " << target << " succeeded with response -- "
+ << callbackData.response.data;
+ } else {
+ log() << "replSetStepUp request to " << target << " failed due to " << status;
+ }
+ });
+
+ auto callbackHandleStatus = callbackHandleSW.getStatus();
+ if (!callbackHandleStatus.isOK()) {
+ error() << "Failed to schedule ReplSetStepUp request to " << target
+ << " for election handoff: " << callbackHandleStatus;
+ }
+}
+
void ReplicationCoordinatorImpl::_handleTimePassing(
const executor::TaskExecutor::CallbackArgs& cbData) {
if (!cbData.status.isOK()) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 0416df74823..a0175b86458 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -596,6 +596,11 @@ private:
void _handleTimePassing(const executor::TaskExecutor::CallbackArgs& cbData);
/**
+ * Chooses a candidate for election handoff and sends a ReplSetStepUp command to it.
+ */
+ void _performElectionHandoff();
+
+ /**
* Helper method for _awaitReplication that takes an already locked unique_lock, but leaves
* operation timing to the caller.
*/