diff options
author | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2018-06-25 14:38:41 -0400 |
---|---|---|
committer | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2018-07-12 15:52:05 -0400 |
commit | 069e6f06b4e0faef45661ac043c7a4ebcf026d96 (patch) | |
tree | e1e6b7a404227320360d7ded76d11e0f2bd52141 | |
parent | 2e975546f4676e1ba65e3376c08bd43bee14305f (diff) | |
download | mongo-069e6f06b4e0faef45661ac043c7a4ebcf026d96.tar.gz |
SERVER-35623 Send a replSetStepUp command to an eligible candidate on stepdown
-rw-r--r-- | jstests/replsets/election_handoff_basic.js | 26 | ||||
-rw-r--r-- | jstests/replsets/election_handoff_flip.js | 27 | ||||
-rw-r--r-- | jstests/replsets/election_handoff_higher_priority.js | 31 | ||||
-rw-r--r-- | jstests/replsets/election_handoff_one_unelectable.js | 29 | ||||
-rw-r--r-- | jstests/replsets/libs/election_handoff.js | 73 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 41 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.h | 5 |
7 files changed, 232 insertions, 0 deletions
diff --git a/jstests/replsets/election_handoff_basic.js b/jstests/replsets/election_handoff_basic.js new file mode 100644 index 00000000000..c11a60612a2 --- /dev/null +++ b/jstests/replsets/election_handoff_basic.js @@ -0,0 +1,26 @@ +/** + * This is a basic test that checks that, with election handoff is enabled, a primary that steps + * down sends a ReplSetStepUp request to an eligible candidate. It uses a two-node replica set, + * so there is only one secondary that can take over. + */ + +(function() { + "use strict"; + load("jstests/replsets/libs/election_handoff.js"); + + const testName = "election_handoff_vanilla"; + const numNodes = 2; + const rst = ReplSetTest({name: testName, nodes: numNodes}); + const nodes = rst.nodeList(); + rst.startSet(); + + // Make sure there are no election timeouts firing for the duration of the test. This helps + // ensure that the test will only pass if the election handoff succeeds. + const config = rst.getReplSetConfig(); + config.settings = {"electionTimeoutMillis": 12 * 60 * 60 * 1000}; + rst.initiate(config); + + ElectionHandoffTest.testElectionHandoff(rst, 0, 1); + + rst.stopSet(); +})();
\ No newline at end of file diff --git a/jstests/replsets/election_handoff_flip.js b/jstests/replsets/election_handoff_flip.js new file mode 100644 index 00000000000..6e6c6f7bd66 --- /dev/null +++ b/jstests/replsets/election_handoff_flip.js @@ -0,0 +1,27 @@ +/** + * This test uses a two-node replica set and exercises election handoff from one node to the other, + * then back to the first one. + */ + +(function() { + "use strict"; + load("jstests/replsets/libs/election_handoff.js"); + + const testName = "election_handoff_flip"; + const numNodes = 2; + const rst = ReplSetTest({name: testName, nodes: numNodes}); + const nodes = rst.nodeList(); + rst.startSet(); + + // Make sure there are no election timeouts firing for the duration of the test. This helps + // ensure that the test will only pass if the election handoff succeeds. + const config = rst.getReplSetConfig(); + config.settings = {"electionTimeoutMillis": 12 * 60 * 60 * 1000}; + rst.initiate(config); + + ElectionHandoffTest.testElectionHandoff(rst, 0, 1); + sleep(ElectionHandoffTest.stepDownPeriodSecs * 1000); + ElectionHandoffTest.testElectionHandoff(rst, 1, 0); + + rst.stopSet(); +})();
\ No newline at end of file diff --git a/jstests/replsets/election_handoff_higher_priority.js b/jstests/replsets/election_handoff_higher_priority.js new file mode 100644 index 00000000000..78a866a1201 --- /dev/null +++ b/jstests/replsets/election_handoff_higher_priority.js @@ -0,0 +1,31 @@ +/** + * This is a test that checks that, with election handoff is enabled, a primary that steps + * down sends a ReplSetStepUp request to an eligible candidate. This test uses a three node + * replica set, where one of the secondaries has a higher priority than the other. The test + * expects that that secondary gets chosen as the election handoff candidate. + */ + +(function() { + "use strict"; + load("jstests/replsets/libs/election_handoff.js"); + + const testName = "election_handoff_higher_priority"; + const numNodes = 3; + const rst = ReplSetTest({name: testName, nodes: numNodes}); + const nodes = rst.nodeList(); + rst.startSet(); + + const config = rst.getReplSetConfig(); + config.members[0].priority = 3; + config.members[1].priority = 1; + config.members[2].priority = 2; + + // Make sure there are no election timeouts firing for the duration of the test. This helps + // ensure that the test will only pass if the election handoff succeeds. + config.settings = {"electionTimeoutMillis": 12 * 60 * 60 * 1000}; + rst.initiate(config); + + ElectionHandoffTest.testElectionHandoff(rst, 0, 2); + + rst.stopSet(); +})();
\ No newline at end of file diff --git a/jstests/replsets/election_handoff_one_unelectable.js b/jstests/replsets/election_handoff_one_unelectable.js new file mode 100644 index 00000000000..97546cbb1ea --- /dev/null +++ b/jstests/replsets/election_handoff_one_unelectable.js @@ -0,0 +1,29 @@ +/** + * This is a basic test that checks that, with election handoff enabled, a primary that steps down + * sends a ReplSetStepUp request to an eligible candidate. This test uses a three-node replica + * set, where one of the secondaries is unelectable, so the test expects the other one to get + * chosen for election handoff. + */ + +(function() { + "use strict"; + load("jstests/replsets/libs/election_handoff.js"); + + const testName = "election_handoff_one_unelectable"; + const numNodes = 3; + const rst = ReplSetTest({name: testName, nodes: numNodes}); + const nodes = rst.nodeList(); + rst.startSet(); + + const config = rst.getReplSetConfig(); + config.members[1].priority = 0; + + // Make sure there are no election timeouts firing for the duration of the test. This helps + // ensure that the test will only pass if the election handoff succeeds. + config.settings = {"electionTimeoutMillis": 12 * 60 * 60 * 1000}; + rst.initiate(config); + + ElectionHandoffTest.testElectionHandoff(rst, 0, 2); + + rst.stopSet(); +})();
\ No newline at end of file diff --git a/jstests/replsets/libs/election_handoff.js b/jstests/replsets/libs/election_handoff.js new file mode 100644 index 00000000000..95b5effbc04 --- /dev/null +++ b/jstests/replsets/libs/election_handoff.js @@ -0,0 +1,73 @@ +"use strict"; + +/** + * This file is used for testing election handoff. + */ + +var ElectionHandoffTest = (function() { + + load("jstests/libs/check_log.js"); + load("jstests/replsets/rslib.js"); + + const kStepDownPeriodSecs = 30; + + /** + * Exercises and validates an election handoff scenario by stepping down the primary and + * ensuring that the node at "expectedCandidateId" is stepped up in its place. The desired + * configuration of the replica set is passed in as its ReplSetTest instance. + */ + function testElectionHandoff(rst, initialPrimaryId, expectedCandidateId) { + const config = rst.getReplSetConfigFromNode(); + const numNodes = config.members.length; + const memberInfo = config.members[expectedCandidateId]; + + assert.neq( + true, memberInfo["arbiterOnly"], "Election handoff candidate cannot be an arbiter."); + assert.neq( + 0, memberInfo["priority"], "Election handoff candidate cannot have zero priority"); + + rst.awaitNodesAgreeOnPrimary(); + const primary = rst.getPrimary(); + assert.eq(rst.nodes[initialPrimaryId], primary); + + // Store the term for future verification. + const status = assert.commandWorked(primary.adminCommand({replSetGetStatus: 1})); + const term = +status.term; + + jsTestLog("Enabling election handoff..."); + + // Enable election handoff. + assert.commandWorked(primary.adminCommand({setParameter: 1, handOffElectionOnStepdown: 1})); + + jsTestLog("Stepping down primary..."); + + // Step down the current primary. + assert.adminCommandWorkedAllowingNetworkError(primary, { + replSetStepDown: kStepDownPeriodSecs, + secondaryCatchUpPeriodSecs: kStepDownPeriodSecs / 2 + }); + + jsTestLog(`Checking that the secondary with id ${expectedCandidateId} is stepped up...`); + + const expectedCandidate = rst.nodes[expectedCandidateId]; + + // The checkLog() function blocks until the log line appears. + checkLog.contains(expectedCandidate, "Starting an election due to step up request"); + + // If there are only two nodes in the set, verify that the old primary voted "yes". + if (numNodes === 2) { + checkLog.contains( + expectedCandidate, + `VoteRequester(term ${term} dry run) received a yes vote from ${primary.host}`); + checkLog.contains( + expectedCandidate, + `VoteRequester(term ${term+1}) received a yes vote from ${primary.host}`); + } + + rst.awaitNodesAgreeOnPrimary(); + assert.eq(rst.nodes[expectedCandidateId], rst.getPrimary()); + } + + return {testElectionHandoff: testElectionHandoff, stepDownPeriodSecs: kStepDownPeriodSecs}; + +})();
\ No newline at end of file diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 56e5fdf774f..ae9ca6fc713 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -105,6 +105,8 @@ const OperationContext::Decoration<bool> alwaysAllowNonLocalWrites = MONGO_EXPORT_SERVER_PARAMETER(numInitialSyncAttempts, int, 10); +MONGO_EXPORT_SERVER_PARAMETER(handOffElectionOnStepdown, bool, false); + // Number of seconds between noop writer writes. MONGO_EXPORT_STARTUP_SERVER_PARAMETER(periodicNoopIntervalSecs, int, 10); @@ -1759,13 +1761,52 @@ Status ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx, // Stepdown success! onExitGuard.Dismiss(); updateMemberState(); + // Schedule work to (potentially) step back up once the stepdown period has ended. _scheduleWorkAt(stepDownUntil, [=](const executor::TaskExecutor::CallbackArgs& cbData) { _handleTimePassing(cbData); }); + + // If election handoff is enabled, schedule a step-up immediately instead of waiting for the + // election timeout to expire. + if (!force && handOffElectionOnStepdown.load()) { + _performElectionHandoff(); + } return Status::OK(); } +void ReplicationCoordinatorImpl::_performElectionHandoff() { + stdx::lock_guard<stdx::mutex> lock(_mutex); + auto candidateIndex = _topCoord->chooseElectionHandoffCandidate(); + + if (candidateIndex < 0) { + log() << "Could not find node to hand off election to."; + return; + } + + auto target = _rsConfig.getMemberAt(candidateIndex).getHostAndPort(); + executor::RemoteCommandRequest request(target, "admin", BSON("replSetStepUp" << 1), nullptr); + log() << "Handing off election to " << target; + + auto callbackHandleSW = _replExecutor->scheduleRemoteCommand( + request, [target](const executor::TaskExecutor::RemoteCommandCallbackArgs& callbackData) { + auto status = callbackData.response.status; + + if (status.isOK()) { + LOG(1) << "replSetStepUp request to " << target << " succeeded with response -- " + << callbackData.response.data; + } else { + log() << "replSetStepUp request to " << target << " failed due to " << status; + } + }); + + auto callbackHandleStatus = callbackHandleSW.getStatus(); + if (!callbackHandleStatus.isOK()) { + error() << "Failed to schedule ReplSetStepUp request to " << target + << " for election handoff: " << callbackHandleStatus; + } +} + void ReplicationCoordinatorImpl::_handleTimePassing( const executor::TaskExecutor::CallbackArgs& cbData) { if (!cbData.status.isOK()) { diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index 0416df74823..a0175b86458 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -596,6 +596,11 @@ private: void _handleTimePassing(const executor::TaskExecutor::CallbackArgs& cbData); /** + * Chooses a candidate for election handoff and sends a ReplSetStepUp command to it. + */ + void _performElectionHandoff(); + + /** * Helper method for _awaitReplication that takes an already locked unique_lock, but leaves * operation timing to the caller. */ |