summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenety Goh <benety@mongodb.com>2017-06-16 15:35:06 -0400
committerBenety Goh <benety@mongodb.com>2017-07-12 22:44:33 -0400
commitee6d550e81773fafd2a981b100ab520b73970c5e (patch)
treeff99f40f5c38594ef3d4909fb3c3103cdb2e3b27
parenta889b0d79a17eeed1f548a227e13ad553d1b32a2 (diff)
downloadmongo-ee6d550e81773fafd2a981b100ab520b73970c5e.tar.gz
SERVER-29015 TopologyCoordinator should not transition to candidate role in a single node replica set if we are in maintenance mode
(cherry picked from commit 5dd64f88d2b66078c957eea5a7889076ee5956b6)
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp39
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.cpp30
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.h9
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl_test.cpp28
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp65
5 files changed, 157 insertions, 14 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index 6ee1b399cdd..e586f1f0e9e 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -1815,6 +1815,45 @@ TEST_F(ReplCoordTest, NodeBecomesPrimaryAgainWhenStepDownTimeoutExpiresInASingle
ASSERT_TRUE(getReplCoord()->getMemberState().primary());
}
+TEST_F(
+ ReplCoordTest,
+ NodeGoesIntoRecoveryAgainWhenStepDownTimeoutExpiresInASingleNodeSetAndWeAreInMaintenanceMode) {
+ init("mySet");
+
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version"
+ << 1
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234"))),
+ HostAndPort("test1", 1234));
+ runSingleNodeElection(makeOperationContext(), getReplCoord(), getNet());
+ const auto opCtx = makeOperationContext();
+
+ ASSERT_OK(getReplCoord()->stepDown(opCtx.get(), true, Milliseconds(0), Milliseconds(1000)));
+ getNet()->enterNetwork(); // Must do this before inspecting the topocoord
+ Date_t stepdownUntil = getNet()->now() + Seconds(1);
+ ASSERT_EQUALS(stepdownUntil, getTopoCoord().getStepDownTime());
+ ASSERT_TRUE(getTopoCoord().getMemberState().secondary());
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+
+ // Go into maintenance mode.
+ ASSERT_EQUALS(0, getTopoCoord().getMaintenanceCount());
+ ASSERT_FALSE(getReplCoord()->getMaintenanceMode());
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
+ ASSERT_EQUALS(1, getTopoCoord().getMaintenanceCount());
+ ASSERT_TRUE(getReplCoord()->getMaintenanceMode());
+
+ // Now run time forward and make sure that the node goes into RECOVERING again when the stepdown
+ // period ends.
+ getNet()->runUntil(stepdownUntil);
+ ASSERT_EQUALS(stepdownUntil, getNet()->now());
+ ASSERT_EQUALS(MemberState(MemberState::RS_RECOVERING), getTopoCoord().getMemberState());
+ getNet()->exitNetwork();
+ ASSERT_EQUALS(MemberState(MemberState::RS_RECOVERING), getReplCoord()->getMemberState());
+}
+
TEST_F(StepDownTest,
NodeReturnsExceededTimeLimitWhenNoSecondaryIsCaughtUpWithinStepDownsSecondaryCatchUpPeriod) {
OpTimeWithTermOne optime1(100, 1);
diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp
index 75231166588..88337b6a500 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl.cpp
@@ -1852,11 +1852,10 @@ TopologyCoordinatorImpl::prepareFreezeResponse(Date_t now, int secs, BSONObjBuil
log() << "'unfreezing'";
response->append("info", "unfreezing");
- if (_followerMode == MemberState::RS_SECONDARY && _rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 && _rsConfig.getMemberAt(_selfIndex).isElectable()) {
+ if (_isElectableNodeInSingleNodeReplicaSet()) {
// If we are a one-node replica set, we're the one member,
- // we're electable, and we are currently in followerMode SECONDARY,
- // we must transition to candidate now that our stepdown period
+ // we're electable, we're not in maintenance mode, and we are currently in followerMode
+ // SECONDARY, we must transition to candidate now that our stepdown period
// is no longer active, in leiu of heartbeats.
_role = Role::candidate;
return PrepareFreezeResponseResult::kElectSelf;
@@ -1877,11 +1876,10 @@ bool TopologyCoordinatorImpl::becomeCandidateIfStepdownPeriodOverAndSingleNodeSe
return false;
}
- if (_followerMode == MemberState::RS_SECONDARY && _rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 && _rsConfig.getMemberAt(_selfIndex).isElectable()) {
+ if (_isElectableNodeInSingleNodeReplicaSet()) {
// If the new config describes a one-node replica set, we're the one member,
- // we're electable, and we are currently in followerMode SECONDARY,
- // we must transition to candidate, in leiu of heartbeats.
+ // we're electable, we're not in maintenance mode, and we are currently in followerMode
+ // SECONDARY, we must transition to candidate, in leiu of heartbeats.
_role = Role::candidate;
return true;
}
@@ -1984,11 +1982,10 @@ void TopologyCoordinatorImpl::updateConfig(const ReplSetConfig& newConfig,
// By this point we know we are in Role::follower
_currentPrimaryIndex = -1; // force secondaries to re-detect who the primary is
- if (_followerMode == MemberState::RS_SECONDARY && _rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 && _rsConfig.getMemberAt(_selfIndex).isElectable()) {
+ if (_isElectableNodeInSingleNodeReplicaSet()) {
// If the new config describes a one-node replica set, we're the one member,
- // we're electable, and we are currently in followerMode SECONDARY,
- // we must transition to candidate, in leiu of heartbeats.
+ // we're electable, we're not in maintenance mode and we are currently in followerMode
+ // SECONDARY, we must transition to candidate, in leiu of heartbeats.
_role = Role::candidate;
}
}
@@ -2341,12 +2338,17 @@ void TopologyCoordinatorImpl::setFollowerMode(MemberState::MS newMode) {
// be a candidate here. This is necessary because a single node replica set has no
// heartbeats that would normally change the role to candidate.
- if (_rsConfig.getNumMembers() == 1 && _selfIndex == 0 &&
- _rsConfig.getMemberAt(_selfIndex).isElectable()) {
+ if (_isElectableNodeInSingleNodeReplicaSet()) {
_role = Role::candidate;
}
}
+bool TopologyCoordinatorImpl::_isElectableNodeInSingleNodeReplicaSet() const {
+ return _followerMode == MemberState::RS_SECONDARY && _rsConfig.getNumMembers() == 1 &&
+ _selfIndex == 0 && _rsConfig.getMemberAt(_selfIndex).isElectable() &&
+ _maintenanceModeCalls == 0;
+}
+
bool TopologyCoordinatorImpl::stepDownIfPending() {
if (!_stepDownPending) {
return false;
diff --git a/src/mongo/db/repl/topology_coordinator_impl.h b/src/mongo/db/repl/topology_coordinator_impl.h
index b30d16b4c60..d4b0476cde1 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.h
+++ b/src/mongo/db/repl/topology_coordinator_impl.h
@@ -384,6 +384,15 @@ private:
**/
bool _memberIsBlacklisted(const MemberConfig& memberConfig, Date_t now) const;
+ /**
+ * Returns true if we are a one-node replica set, we're the one member,
+ * we're electable, we're not in maintenance mode, and we are currently in followerMode
+ * SECONDARY.
+ *
+ * This is used to decide if we should transition to Role::candidate in a one-node replica set.
+ */
+ bool _isElectableNodeInSingleNodeReplicaSet() const;
+
// This node's role in the replication protocol.
Role _role;
diff --git a/src/mongo/db/repl/topology_coordinator_impl_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
index bd617b2dfa6..e3708a9bf54 100644
--- a/src/mongo/db/repl/topology_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
@@ -4617,6 +4617,34 @@ TEST_F(TopoCoordTest,
"cannot freeze node when primary or running for election. state: Running-Election");
}
+TEST_F(TopoCoordTest, DoNotBecomeCandidateOnUnfreezingInMaintenanceMode) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 5
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ BSONObjBuilder response;
+ ASSERT_EQUALS(
+ TopologyCoordinator::PrepareFreezeResponseResult::kNoAction,
+ unittest::assertGet(getTopoCoord().prepareFreezeResponse(now()++, 20, &response)));
+ ASSERT(response.obj().isEmpty());
+ BSONObjBuilder response2;
+
+ // We should not transition to Role::candidate if we are in maintenance upon unfreezing.
+ getTopoCoord().adjustMaintenanceCountBy(1);
+
+ ASSERT_EQUALS(
+ TopologyCoordinator::PrepareFreezeResponseResult::kNoAction,
+ unittest::assertGet(getTopoCoord().prepareFreezeResponse(now()++, 0, &response2)));
+ ASSERT_EQUALS("unfreezing", response2.obj()["info"].String());
+ ASSERT(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
class PrepareHeartbeatResponseTest : public TopoCoordTest {
public:
virtual void setUp() {
diff --git a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp
index 72e81caba7f..8062208949b 100644
--- a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp
@@ -2073,6 +2073,35 @@ TEST_F(TopoCoordTest, BecomeCandidateWhenBecomingSecondaryInSingleNodeSet) {
ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
}
+TEST_F(TopoCoordTest, DoNotBecomeCandidateWhenBecomingSecondaryInSingleNodeSetIfInMaintenanceMode) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 1
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "hself"))),
+ 0);
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+
+ // If we are the only node and we are in maintenance mode, we should not become a candidate when
+ // we transition to SECONDARY.
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ getTopoCoord().adjustMaintenanceCountBy(1);
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // getMemberState() returns RS_RECOVERING while we are in maintenance mode even though
+ // _memberState is set to RS_SECONDARY.
+ ASSERT_EQUALS(MemberState::RS_RECOVERING, getTopoCoord().getMemberState().s);
+
+ // Once we are no longer in maintenance mode, getMemberState() should return RS_SECONDARY.
+ getTopoCoord().adjustMaintenanceCountBy(-1);
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+}
+
TEST_F(TopoCoordTest, BecomeCandidateWhenReconfigToBeElectableInSingleNodeSet) {
ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
@@ -2107,6 +2136,42 @@ TEST_F(TopoCoordTest, BecomeCandidateWhenReconfigToBeElectableInSingleNodeSet) {
ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
}
+TEST_F(TopoCoordTest,
+ DoNotBecomeCandidateWhenReconfigToBeElectableInSingleNodeSetIfInMaintenanceMode) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ ReplSetConfig cfg;
+ ASSERT_OK(cfg.initialize(BSON("_id"
+ << "rs0"
+ << "version"
+ << 1
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "hself"
+ << "priority"
+ << 0)))));
+ getTopoCoord().updateConfig(cfg, 0, now()++, OpTime());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+
+ // We should not become a candidate when we reconfig to become electable if we are currently in
+ // maintenance mode.
+ getTopoCoord().adjustMaintenanceCountBy(1);
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 1
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "hself"))),
+ 0);
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+}
+
TEST_F(TopoCoordTest, NodeDoesNotBecomeCandidateWhenBecomingSecondaryInSingleNodeSetIfUnelectable) {
ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);