summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSpencer T Brody <spencer@mongodb.com>2018-08-28 13:29:13 -0400
committerSpencer T Brody <spencer@mongodb.com>2018-08-30 17:08:03 -0400
commit0db56f8e29330894a7b24ee2b3f68f8f79e14848 (patch)
tree53ad5f2071ec08422901a5c245a3cd32e6fb082b
parent98fba86b404ddddb8d66f9751817e11a3d732ccd (diff)
downloadmongo-0db56f8e29330894a7b24ee2b3f68f8f79e14848.tar.gz
SERVER-36746 Don't erroneously skip drain mode if a stepdown attempt fails.
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp9
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp61
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp18
-rw-r--r--src/mongo/db/repl/topology_coordinator.h82
-rw-r--r--src/mongo/db/repl/topology_coordinator_v1_test.cpp18
5 files changed, 124 insertions, 64 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index a366c46a62f..b44c58039cd 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1699,13 +1699,14 @@ Status ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
const long long termAtStart = _topCoord->getTerm();
- status = _topCoord->prepareForStepDownAttempt();
- if (!status.isOK()) {
+ auto statusWithAbortFn = _topCoord->prepareForStepDownAttempt();
+ if (!statusWithAbortFn.isOK()) {
// This will cause us to fail if we're already in the process of stepping down.
// It is also possible to get here even if we're done stepping down via another path,
// and this will also elicit a failure from this call.
- return status;
+ return statusWithAbortFn.getStatus();
}
+ const auto& abortFn = statusWithAbortFn.getValue();
// Update _canAcceptNonLocalWrites from the TopologyCoordinator now that we're in the middle
// of a stepdown attempt. This will prevent us from accepting writes so that if our stepdown
@@ -1741,7 +1742,7 @@ Status ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
_performPostMemberStateUpdateAction(action);
};
ScopeGuard onExitGuard = MakeGuard([&] {
- _topCoord->abortAttemptedStepDownIfNeeded();
+ abortFn();
updateMemberState();
});
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index 1f12e006783..24202d1de02 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -1668,6 +1668,67 @@ TEST_F(StepDownTest, StepDownCanCompleteBasedOnReplSetUpdatePositionAlone) {
ASSERT_TRUE(repl->getMemberState().secondary());
}
+TEST_F(StepDownTest, StepDownFailureRestoresDrainState) {
+ const auto repl = getReplCoord();
+
+ OpTimeWithTermOne opTime1(100, 1);
+ OpTimeWithTermOne opTime2(200, 1);
+
+ repl->setMyLastAppliedOpTime(opTime2);
+ repl->setMyLastDurableOpTime(opTime2);
+
+ // Secondaries not caught up yet.
+ ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 1, opTime1));
+ ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 2, opTime1));
+
+ auto electionTimeoutWhen = getReplCoord()->getElectionTimeout_forTest();
+ simulateSuccessfulV1ElectionWithoutExitingDrainMode(electionTimeoutWhen);
+ ASSERT_TRUE(repl->getMemberState().primary());
+ ASSERT(repl->getApplierState() == ReplicationCoordinator::ApplierState::Draining);
+
+ {
+ // We can't take writes yet since we're still in drain mode.
+ const auto opCtx = makeOperationContext();
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
+ ASSERT_FALSE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "admin"));
+ }
+
+ // Step down where the secondary actually has to catch up before the stepDown can succeed.
+ auto result = stepDown_nonBlocking(false, Seconds(10), Seconds(60));
+
+ // Interrupt the ongoing stepdown command so that the stepdown attempt will fail.
+ {
+ stdx::lock_guard<Client> lk(*result.first.client.get());
+ result.first.opCtx->markKilled(ErrorCodes::Interrupted);
+ }
+
+ // Ensure that the stepdown command failed.
+ auto stepDownStatus = *result.second.get();
+ ASSERT_NOT_OK(stepDownStatus);
+ // Which code is returned is racy.
+ ASSERT(stepDownStatus == ErrorCodes::PrimarySteppedDown ||
+ stepDownStatus == ErrorCodes::Interrupted);
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+ ASSERT(repl->getApplierState() == ReplicationCoordinator::ApplierState::Draining);
+
+ // Ensure that the failed stepdown attempt didn't make us able to take writes since we're still
+ // in drain mode.
+ {
+ const auto opCtx = makeOperationContext();
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
+ ASSERT_FALSE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "admin"));
+ }
+
+ // Now complete drain mode and ensure that we become capable of taking writes.
+ auto opCtx = makeOperationContext();
+ signalDrainComplete(opCtx.get());
+ ASSERT(repl->getApplierState() == ReplicationCoordinator::ApplierState::Stopped);
+
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
+ ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "admin"));
+}
+
class StepDownTestWithUnelectableNode : public StepDownTest {
private:
void setUp() override {
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index a298f5e0ecc..95c181bec38 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -1284,7 +1284,8 @@ bool TopologyCoordinator::prepareForUnconditionalStepDown() {
return true;
}
-Status TopologyCoordinator::prepareForStepDownAttempt() {
+StatusWith<TopologyCoordinator::StepDownAttemptAbortFn>
+TopologyCoordinator::prepareForStepDownAttempt() {
if (_leaderMode == LeaderMode::kSteppingDown ||
_leaderMode == LeaderMode::kAttemptingStepDown) {
return Status{ErrorCodes::ConflictingOperationInProgress,
@@ -1295,14 +1296,15 @@ Status TopologyCoordinator::prepareForStepDownAttempt() {
return Status{ErrorCodes::NotMaster, "This node is not a primary."};
}
+ invariant(_leaderMode == LeaderMode::kMaster || _leaderMode == LeaderMode::kLeaderElect);
+ const auto previousLeaderMode = _leaderMode;
_setLeaderMode(LeaderMode::kAttemptingStepDown);
- return Status::OK();
-}
-void TopologyCoordinator::abortAttemptedStepDownIfNeeded() {
- if (_leaderMode == TopologyCoordinator::LeaderMode::kAttemptingStepDown) {
- _setLeaderMode(TopologyCoordinator::LeaderMode::kMaster);
- }
+ return {[this, previousLeaderMode] {
+ if (_leaderMode == TopologyCoordinator::LeaderMode::kAttemptingStepDown) {
+ _setLeaderMode(previousLeaderMode);
+ }
+ }};
}
void TopologyCoordinator::changeMemberState_forTest(const MemberState& newMemberState,
@@ -2067,7 +2069,7 @@ void TopologyCoordinator::_setLeaderMode(TopologyCoordinator::LeaderMode newMode
break;
case LeaderMode::kAttemptingStepDown:
invariant(newMode == LeaderMode::kNotLeader || newMode == LeaderMode::kMaster ||
- newMode == LeaderMode::kSteppingDown);
+ newMode == LeaderMode::kSteppingDown || newMode == LeaderMode::kLeaderElect);
break;
case LeaderMode::kSteppingDown:
invariant(newMode == LeaderMode::kNotLeader);
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index 117bc784f75..bfd272e3f5c 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -93,40 +93,6 @@ public:
~TopologyCoordinator();
- /**
- * Different modes a node can be in while still reporting itself as in state PRIMARY.
- *
- * Valid transitions:
- *
- * kNotLeader <----------------------------------
- * | |
- * | |
- * | |
- * v |
- * kLeaderElect----- |
- * | | |
- * | | |
- * v | |
- * kMaster ------------------------- |
- * | ^ | | |
- * | | ------------------- | |
- * | | | | | |
- * v | v v v |
- * kAttemptingStepDown----------->kSteppingDown |
- * | | |
- * | | |
- * | | |
- * ---------------------------------------------
- *
- */
- enum class LeaderMode {
- kNotLeader, // This node is not currently a leader.
- kLeaderElect, // This node has been elected leader, but can't yet accept writes.
- kMaster, // This node reports ismaster:true and can accept writes.
- kSteppingDown, // This node is in the middle of a (hb) stepdown that must complete.
- kAttemptingStepDown, // This node is in the middle of a stepdown (cmd) that might fail.
- };
-
////////////////////////////////////////////////////////////
//
// State inspection methods.
@@ -537,6 +503,8 @@ public:
*/
void processLoseElection();
+
+ using StepDownAttemptAbortFn = stdx::function<void()>;
/**
* Readies the TopologyCoordinator for an attempt to stepdown that may fail. This is used
* when we receive a stepdown command (which can fail if not enough secondaries are caught up)
@@ -545,16 +513,10 @@ public:
* - NotMaster if this node is not a leader.
* - ConflictingOperationInProgess if this node is already processing a stepdown request of any
* kind.
+ * On an OK return status also returns a function object that can be called to abort the
+ * pending stepdown attempt and return this node to normal primary/master state.
*/
- Status prepareForStepDownAttempt();
-
- /**
- * If this node is still attempting to process a stepdown attempt, aborts the attempt and
- * returns this node to normal primary/master state. If this node has already completed
- * stepping down or is now in the process of handling an unconditional stepdown, then this
- * method does nothing.
- */
- void abortAttemptedStepDownIfNeeded();
+ StatusWith<StepDownAttemptAbortFn> prepareForStepDownAttempt();
/**
* Tries to transition the coordinator from the leader role to the follower role.
@@ -731,6 +693,40 @@ private:
typedef int UnelectableReasonMask;
class PingStats;
+ /**
+ * Different modes a node can be in while still reporting itself as in state PRIMARY.
+ *
+ * Valid transitions:
+ *
+ * kNotLeader <----------------------------------
+ * | |
+ * | |
+ * | |
+ * v |
+ * kLeaderElect----------------- |
+ * | ^ | | |
+ * | | | | |
+ * v | | | |
+ * kMaster -------------------------- |
+ * | ^ | | | | |
+ * | | | | | | |
+ * | | | | | | |
+ * v | | v v v |
+ * kAttemptingStepDown----------->kSteppingDown |
+ * | | |
+ * | | |
+ * | | |
+ * ---------------------------------------------
+ *
+ */
+ enum class LeaderMode {
+ kNotLeader, // This node is not currently a leader.
+ kLeaderElect, // This node has been elected leader, but can't yet accept writes.
+ kMaster, // This node reports ismaster:true and can accept writes.
+ kSteppingDown, // This node is in the middle of a (hb) stepdown that must complete.
+ kAttemptingStepDown, // This node is in the middle of a stepdown (cmd) that might fail.
+ };
+
enum UnelectableReason {
None = 0,
CannotSeeMajority = 1 << 0,
diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
index be14cb543bb..6e4e7efeed1 100644
--- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
@@ -1917,7 +1917,7 @@ TEST_F(TopoCoordTest, PrepareStepDownAttemptFailsIfNotLeader) {
getTopoCoord().changeMemberState_forTest(MemberState::RS_SECONDARY);
Status expectedStatus(ErrorCodes::NotMaster, "This node is not a primary. ");
- ASSERT_EQUALS(expectedStatus, getTopoCoord().prepareForStepDownAttempt());
+ ASSERT_EQUALS(expectedStatus, getTopoCoord().prepareForStepDownAttempt().getStatus());
}
class PrepareHeartbeatResponseV1Test : public TopoCoordTest {
@@ -4264,7 +4264,7 @@ TEST_F(TopoCoordTest, StepDownAttemptFailsForDifferentTerm) {
Date_t futureTime = curTime + Seconds(1);
makeSelfPrimary();
- ASSERT_OK(getTopoCoord().prepareForStepDownAttempt());
+ ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus());
ASSERT_THROWS_CODE(
getTopoCoord().attemptStepDown(term - 1, curTime, futureTime, futureTime, false),
@@ -4294,7 +4294,7 @@ TEST_F(TopoCoordTest, StepDownAttemptFailsIfPastStepDownUntil) {
Date_t futureTime = curTime + Seconds(1);
makeSelfPrimary();
- ASSERT_OK(getTopoCoord().prepareForStepDownAttempt());
+ ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus());
ASSERT_THROWS_CODE_AND_WHAT(
getTopoCoord().attemptStepDown(term, curTime, futureTime, curTime, false),
@@ -4326,7 +4326,7 @@ TEST_F(TopoCoordTest, StepDownAttemptFailsIfPastWaitUntil) {
Date_t futureTime = curTime + Seconds(1);
makeSelfPrimary();
- ASSERT_OK(getTopoCoord().prepareForStepDownAttempt());
+ ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus());
std::string expectedWhat = str::stream()
<< "No electable secondaries caught up as of " << dateToISOStringLocal(curTime)
@@ -4362,7 +4362,7 @@ TEST_F(TopoCoordTest, StepDownAttemptFailsIfNoSecondariesCaughtUp) {
makeSelfPrimary();
setMyOpTime(OpTime(Timestamp(5, 0), term));
- ASSERT_OK(getTopoCoord().prepareForStepDownAttempt());
+ ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus());
heartbeatFromMember(
HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(4, 0), term));
@@ -4395,7 +4395,7 @@ TEST_F(TopoCoordTest, StepDownAttemptFailsIfNoSecondariesCaughtUpForceIsTrueButN
makeSelfPrimary();
setMyOpTime(OpTime(Timestamp(5, 0), term));
- ASSERT_OK(getTopoCoord().prepareForStepDownAttempt());
+ ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus());
heartbeatFromMember(
HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(4, 0), term));
@@ -4428,7 +4428,7 @@ TEST_F(TopoCoordTest, StepDownAttemptSucceedsIfNoSecondariesCaughtUpForceIsTrueA
makeSelfPrimary();
setMyOpTime(OpTime(Timestamp(5, 0), term));
- ASSERT_OK(getTopoCoord().prepareForStepDownAttempt());
+ ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus());
heartbeatFromMember(
HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(4, 0), term));
@@ -4461,7 +4461,7 @@ TEST_F(TopoCoordTest, StepDownAttemptSucceedsIfSecondariesCaughtUp) {
makeSelfPrimary();
setMyOpTime(OpTime(Timestamp(5, 0), term));
- ASSERT_OK(getTopoCoord().prepareForStepDownAttempt());
+ ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus());
heartbeatFromMember(
HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(5, 0), term));
@@ -4498,7 +4498,7 @@ TEST_F(TopoCoordTest, StepDownAttemptFailsIfSecondaryCaughtUpButNotElectable) {
makeSelfPrimary();
setMyOpTime(OpTime(Timestamp(5, 0), term));
- ASSERT_OK(getTopoCoord().prepareForStepDownAttempt());
+ ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus());
heartbeatFromMember(
HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(5, 0), term));