summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndy Schwerin <schwerin@mongodb.com>2014-10-28 14:28:13 -0400
committerAndy Schwerin <schwerin@mongodb.com>2014-10-28 18:32:59 -0400
commitdf128f1328ce12ac098aecafa43cb5e0eda6d3e0 (patch)
tree6a061c0836532bf9267bb95b4360a4deb97e9526 /src
parent385f03dc7205ef60bbb9cb8b475afd9c802bc67d (diff)
downloadmongo-df128f1328ce12ac098aecafa43cb5e0eda6d3e0.tar.gz
SERVER-15771 Do not step down a remote primary unless you plan to replace it yourself.
Also, when stepping down for a higher priority node, set _electionSleepUntil, not _stepDownUntil. This way, lower priority nodes will still consider you electable, and defer to you.
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.cpp37
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl_test.cpp46
2 files changed, 57 insertions, 26 deletions
diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp
index 212519daf10..dbd47960699 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl.cpp
@@ -926,24 +926,33 @@ namespace {
lastOpApplied)) {
const OpTime latestOpTime = _latestKnownOpTime(lastOpApplied);
- log() << "stepping down "
- << currentPrimaryMember.getHostAndPort().toString()
- << " (priority " << currentPrimaryMember.getPriority() << "), "
- << highestPriorityMember.getHostAndPort().toString()
- << " is priority " << highestPriorityMember.getPriority()
- << " and "
- << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs())
- << " seconds behind";
if (_iAmPrimary()) {
+ log() << "Stepping down self (priority "
+ << currentPrimaryMember.getPriority() << ") because "
+ << highestPriorityMember.getHostAndPort() << " has higher priority "
+ << highestPriorityMember.getPriority() << " and is only "
+ << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs())
+ << " seconds behind me";
const Date_t until = now +
LastVote::leaseTime.total_milliseconds() +
kHeartbeatInterval.total_milliseconds();
- if (_stepDownUntil < until) {
- _stepDownUntil = until;
+ if (_electionSleepUntil < until) {
+ _electionSleepUntil = until;
}
return _stepDownSelf();
}
- else {
+ else if ((highestPriorityMemberOptime == _selfIndex) &&
+ (_electionSleepUntil <= now)) {
+ // If this node is the highest priority node, and it is not in
+ // an inter-election sleep period, ask the current primary to step down.
+ // This is an optimization, because the remote primary will almost certainly
+ // notice this node's electability promptly, via its own heartbeat process.
+ log() << "Requesting that " << currentPrimaryMember.getHostAndPort()
+ << " (priority " << currentPrimaryMember.getPriority()
+ << ") step down because I have higher priority "
+ << highestPriorityMember.getPriority() << " and am only "
+ << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs())
+ << " seconds behind it";
int primaryIndex = _currentPrimaryIndex;
_currentPrimaryIndex = -1;
return HeartbeatResponseAction::makeStepDownRemoteAction(primaryIndex);
@@ -1461,7 +1470,9 @@ namespace {
}
void TopologyCoordinatorImpl::setElectionSleepUntil(Date_t newTime) {
- _electionSleepUntil = newTime;
+ if (_electionSleepUntil < newTime) {
+ _electionSleepUntil = newTime;
+ }
}
OpTime TopologyCoordinatorImpl::getElectionTime() const {
@@ -1592,7 +1603,7 @@ namespace {
result |= NoPriority;
}
if (hbData.getState() != MemberState::RS_SECONDARY) {
- result |=NotSecondary;
+ result |= NotSecondary;
}
if (!_isOpTimeCloseEnoughToLatestToElect(hbData.getOpTime(), lastOpApplied)) {
result |= NotCloseEnoughToLatestOptime;
diff --git a/src/mongo/db/repl/topology_coordinator_impl_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
index f0d22e4b7cc..fe97bbf050e 100644
--- a/src/mongo/db/repl/topology_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
@@ -1328,9 +1328,9 @@ namespace {
}
TEST_F(HeartbeatResponseTestOneRetry, DecideToStepDownRemotePrimary) {
- // Confirm that action responses can come back from retries; in this, expect a StepDownSelf
- // action.
-
+ // Confirm that action responses can come back from retries; in this, expect a
+ // StepDownRemotePrimary action.
+
// make self primary
ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
makeSelfPrimary(OpTime(5,0));
@@ -1358,7 +1358,7 @@ namespace {
TEST_F(HeartbeatResponseTestOneRetry, DecideToStepDownSelf) {
// Confirm that action responses can come back from retries; in this, expect a StepDownSelf
// action.
-
+
// acknowledge the other member so that we see a majority
HeartbeatResponseAction action = receiveDownHeartbeat(HostAndPort("host3"),
"rs0",
@@ -1450,9 +1450,9 @@ namespace {
}
TEST_F(HeartbeatResponseTestTwoRetries, DecideToStepDownRemotePrimary) {
- // Confirm that action responses can come back from retries; in this, expect a StepDownSelf
- // action.
-
+ // Confirm that action responses can come back from retries; in this, expect a
+ // StepDownRemotePrimary action.
+
// make self primary
ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
makeSelfPrimary(OpTime(5,0));
@@ -1480,7 +1480,7 @@ namespace {
TEST_F(HeartbeatResponseTestTwoRetries, DecideToStepDownSelf) {
// Confirm that action responses can come back from retries; in this, expect a StepDownSelf
// action.
-
+
// acknowledge the other member so that we see a majority
HeartbeatResponseAction action = receiveDownHeartbeat(HostAndPort("host3"),
"rs0",
@@ -1513,7 +1513,7 @@ namespace {
TEST_F(HeartbeatResponseTestTwoRetries, DecideToStartElection) {
// Confirm that action responses can come back from retries; in this, expect a StartElection
// action.
-
+
// acknowledge the other member so that we see a majority
OpTime election = OpTime(400,0);
OpTime lastOpTimeApplied = OpTime(300,0);
@@ -1687,8 +1687,8 @@ namespace {
TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataStepDownPrimaryForHighPriorityFreshNode) {
// In this test, the Topology coordinator sees a PRIMARY ("host2") and then sees a higher
- // priority and similarly fresh node ("host3"). As a result it responds with a
- // StepDownRemote action for the PRIMARY ("host2").
+ // priority and similarly fresh node ("host3"). However, since the coordinator's node
+ // (host1) is not the higher priority node, it takes no action.
updateConfig(BSON("_id" << "rs0" <<
"version" << 6 <<
"members" << BSON_ARRAY(
@@ -1718,13 +1718,15 @@ namespace {
election,
slightlyLessFreshLastOpTimeApplied,
lastOpTimeApplied);
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, nextAction.getAction());
- ASSERT_EQUALS(1, nextAction.getPrimaryConfigIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, nextAction.getAction());
}
TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataStepDownSelfForHighPriorityFreshNode) {
// In this test, the Topology coordinator becomes PRIMARY and then sees a higher priority
// and equally fresh node ("host3"). As a result it responds with a StepDownSelf action.
+ //
+ // Despite having stepped down, we should remain electable, in order to dissuade lower
+ // priority nodes from standing for election.
updateConfig(BSON("_id" << "rs0" <<
"version" << 6 <<
"members" << BSON_ARRAY(
@@ -1735,6 +1737,7 @@ namespace {
0);
OpTime election = OpTime(1000,0);
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
makeSelfPrimary(election);
ASSERT_EQUALS(0, getCurrentPrimaryIndex());
@@ -1747,6 +1750,23 @@ namespace {
election);
ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction());
ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex());
+
+ // Process a heartbeat response to confirm that this node, which is no longer primary,
+ // still tells other nodes that it is electable. This will stop lower priority nodes
+ // from standing for election.
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setSetName("rs0");
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(6);
+ hbArgs.setSenderId(1);
+ hbArgs.setSenderHost(HostAndPort("host3", 27017));
+ ReplSetHeartbeatResponse hbResp;
+ ASSERT_OK(getTopoCoord().prepareHeartbeatResponse(now(),
+ hbArgs,
+ "rs0",
+ election,
+ &hbResp));
+ ASSERT(!hbResp.hasIsElectable() || hbResp.isElectable()) << hbResp.toBSON().toString();
}
TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataDoNotStepDownSelfForHighPriorityStaleNode) {