diff options
author | Andy Schwerin <schwerin@mongodb.com> | 2014-10-28 14:28:13 -0400 |
---|---|---|
committer | Andy Schwerin <schwerin@mongodb.com> | 2014-10-28 18:32:59 -0400 |
commit | df128f1328ce12ac098aecafa43cb5e0eda6d3e0 (patch) | |
tree | 6a061c0836532bf9267bb95b4360a4deb97e9526 /src | |
parent | 385f03dc7205ef60bbb9cb8b475afd9c802bc67d (diff) | |
download | mongo-df128f1328ce12ac098aecafa43cb5e0eda6d3e0.tar.gz |
SERVER-15771 Do not step down a remote primary unless you plan to replace it yourself.
Also, when stepping down for a higher priority node, set _electionSleepUntil,
not _stepDownUntil. This way, lower priority nodes will still consider you
electable, and defer to you.
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_impl.cpp | 37 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_impl_test.cpp | 46 |
2 files changed, 57 insertions, 26 deletions
diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp index 212519daf10..dbd47960699 100644 --- a/src/mongo/db/repl/topology_coordinator_impl.cpp +++ b/src/mongo/db/repl/topology_coordinator_impl.cpp @@ -926,24 +926,33 @@ namespace { lastOpApplied)) { const OpTime latestOpTime = _latestKnownOpTime(lastOpApplied); - log() << "stepping down " - << currentPrimaryMember.getHostAndPort().toString() - << " (priority " << currentPrimaryMember.getPriority() << "), " - << highestPriorityMember.getHostAndPort().toString() - << " is priority " << highestPriorityMember.getPriority() - << " and " - << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs()) - << " seconds behind"; if (_iAmPrimary()) { + log() << "Stepping down self (priority " + << currentPrimaryMember.getPriority() << ") because " + << highestPriorityMember.getHostAndPort() << " has higher priority " + << highestPriorityMember.getPriority() << " and is only " + << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs()) + << " seconds behind me"; const Date_t until = now + LastVote::leaseTime.total_milliseconds() + kHeartbeatInterval.total_milliseconds(); - if (_stepDownUntil < until) { - _stepDownUntil = until; + if (_electionSleepUntil < until) { + _electionSleepUntil = until; } return _stepDownSelf(); } - else { + else if ((highestPriorityMemberOptime == _selfIndex) && + (_electionSleepUntil <= now)) { + // If this node is the highest priority node, and it is not in + // an inter-election sleep period, ask the current primary to step down. + // This is an optimization, because the remote primary will almost certainly + // notice this node's electability promptly, via its own heartbeat process. + log() << "Requesting that " << currentPrimaryMember.getHostAndPort() + << " (priority " << currentPrimaryMember.getPriority() + << ") step down because I have higher priority " + << highestPriorityMember.getPriority() << " and am only " + << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs()) + << " seconds behind it"; int primaryIndex = _currentPrimaryIndex; _currentPrimaryIndex = -1; return HeartbeatResponseAction::makeStepDownRemoteAction(primaryIndex); @@ -1461,7 +1470,9 @@ namespace { } void TopologyCoordinatorImpl::setElectionSleepUntil(Date_t newTime) { - _electionSleepUntil = newTime; + if (_electionSleepUntil < newTime) { + _electionSleepUntil = newTime; + } } OpTime TopologyCoordinatorImpl::getElectionTime() const { @@ -1592,7 +1603,7 @@ namespace { result |= NoPriority; } if (hbData.getState() != MemberState::RS_SECONDARY) { - result |=NotSecondary; + result |= NotSecondary; } if (!_isOpTimeCloseEnoughToLatestToElect(hbData.getOpTime(), lastOpApplied)) { result |= NotCloseEnoughToLatestOptime; diff --git a/src/mongo/db/repl/topology_coordinator_impl_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_test.cpp index f0d22e4b7cc..fe97bbf050e 100644 --- a/src/mongo/db/repl/topology_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_impl_test.cpp @@ -1328,9 +1328,9 @@ namespace { } TEST_F(HeartbeatResponseTestOneRetry, DecideToStepDownRemotePrimary) { - // Confirm that action responses can come back from retries; in this, expect a StepDownSelf - // action. - + // Confirm that action responses can come back from retries; in this, expect a + // StepDownRemotePrimary action. + // make self primary ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); makeSelfPrimary(OpTime(5,0)); @@ -1358,7 +1358,7 @@ namespace { TEST_F(HeartbeatResponseTestOneRetry, DecideToStepDownSelf) { // Confirm that action responses can come back from retries; in this, expect a StepDownSelf // action. - + // acknowledge the other member so that we see a majority HeartbeatResponseAction action = receiveDownHeartbeat(HostAndPort("host3"), "rs0", @@ -1450,9 +1450,9 @@ namespace { } TEST_F(HeartbeatResponseTestTwoRetries, DecideToStepDownRemotePrimary) { - // Confirm that action responses can come back from retries; in this, expect a StepDownSelf - // action. - + // Confirm that action responses can come back from retries; in this, expect a + // StepDownRemotePrimary action. + // make self primary ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); makeSelfPrimary(OpTime(5,0)); @@ -1480,7 +1480,7 @@ namespace { TEST_F(HeartbeatResponseTestTwoRetries, DecideToStepDownSelf) { // Confirm that action responses can come back from retries; in this, expect a StepDownSelf // action. - + // acknowledge the other member so that we see a majority HeartbeatResponseAction action = receiveDownHeartbeat(HostAndPort("host3"), "rs0", @@ -1513,7 +1513,7 @@ namespace { TEST_F(HeartbeatResponseTestTwoRetries, DecideToStartElection) { // Confirm that action responses can come back from retries; in this, expect a StartElection // action. - + // acknowledge the other member so that we see a majority OpTime election = OpTime(400,0); OpTime lastOpTimeApplied = OpTime(300,0); @@ -1687,8 +1687,8 @@ namespace { TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataStepDownPrimaryForHighPriorityFreshNode) { // In this test, the Topology coordinator sees a PRIMARY ("host2") and then sees a higher - // priority and similarly fresh node ("host3"). As a result it responds with a - // StepDownRemote action for the PRIMARY ("host2"). + // priority and similarly fresh node ("host3"). However, since the coordinator's node + // (host1) is not the higher priority node, it takes no action. updateConfig(BSON("_id" << "rs0" << "version" << 6 << "members" << BSON_ARRAY( @@ -1718,13 +1718,15 @@ namespace { election, slightlyLessFreshLastOpTimeApplied, lastOpTimeApplied); - ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, nextAction.getAction()); - ASSERT_EQUALS(1, nextAction.getPrimaryConfigIndex()); + ASSERT_EQUALS(HeartbeatResponseAction::NoAction, nextAction.getAction()); } TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataStepDownSelfForHighPriorityFreshNode) { // In this test, the Topology coordinator becomes PRIMARY and then sees a higher priority // and equally fresh node ("host3"). As a result it responds with a StepDownSelf action. + // + // Despite having stepped down, we should remain electable, in order to dissuade lower + // priority nodes from standing for election. updateConfig(BSON("_id" << "rs0" << "version" << 6 << "members" << BSON_ARRAY( @@ -1735,6 +1737,7 @@ namespace { 0); OpTime election = OpTime(1000,0); + getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); makeSelfPrimary(election); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); @@ -1747,6 +1750,23 @@ namespace { election); ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction()); ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex()); + + // Process a heartbeat response to confirm that this node, which is no longer primary, + // still tells other nodes that it is electable. This will stop lower priority nodes + // from standing for election. + ReplSetHeartbeatArgs hbArgs; + hbArgs.setSetName("rs0"); + hbArgs.setProtocolVersion(1); + hbArgs.setConfigVersion(6); + hbArgs.setSenderId(1); + hbArgs.setSenderHost(HostAndPort("host3", 27017)); + ReplSetHeartbeatResponse hbResp; + ASSERT_OK(getTopoCoord().prepareHeartbeatResponse(now(), + hbArgs, + "rs0", + election, + &hbResp)); + ASSERT(!hbResp.hasIsElectable() || hbResp.isElectable()) << hbResp.toBSON().toString(); } TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataDoNotStepDownSelfForHighPriorityStaleNode) { |