diff options
author | Spencer T Brody <spencer@mongodb.com> | 2018-08-28 13:29:13 -0400 |
---|---|---|
committer | Spencer T Brody <spencer@mongodb.com> | 2018-09-17 18:56:40 -0400 |
commit | 03fbfd263535c5a2ebe6cf0d0dce3c50195fc43d (patch) | |
tree | 0c07978a774551f6050b573dd14be3560b58f5bd /src/mongo | |
parent | e2d6958949c7d0ae4808fb760005cafaffc942e7 (diff) | |
download | mongo-03fbfd263535c5a2ebe6cf0d0dce3c50195fc43d.tar.gz |
SERVER-36746 Don't erroneously skip drain mode if a stepdown attempt fails.
(cherry picked from commit 0db56f8e29330894a7b24ee2b3f68f8f79e14848)
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_test.cpp | 61 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.h | 82 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_v1_test.cpp | 18 |
5 files changed, 124 insertions, 64 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index e52f5f3a34a..1f126599847 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -1671,13 +1671,14 @@ Status ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx, const long long termAtStart = _topCoord->getTerm(); - status = _topCoord->prepareForStepDownAttempt(); - if (!status.isOK()) { + auto statusWithAbortFn = _topCoord->prepareForStepDownAttempt(); + if (!statusWithAbortFn.isOK()) { // This will cause us to fail if we're already in the process of stepping down. // It is also possible to get here even if we're done stepping down via another path, // and this will also elicit a failure from this call. - return status; + return statusWithAbortFn.getStatus(); } + const auto& abortFn = statusWithAbortFn.getValue(); // Update _canAcceptNonLocalWrites from the TopologyCoordinator now that we're in the middle // of a stepdown attempt. This will prevent us from accepting writes so that if our stepdown @@ -1713,7 +1714,7 @@ Status ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx, _performPostMemberStateUpdateAction(action); }; ScopeGuard onExitGuard = MakeGuard([&] { - _topCoord->abortAttemptedStepDownIfNeeded(); + abortFn(); updateMemberState(); }); diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index 95a16e5572e..1f364f4b555 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -1669,6 +1669,67 @@ TEST_F(StepDownTest, StepDownCanCompleteBasedOnReplSetUpdatePositionAlone) { ASSERT_TRUE(repl->getMemberState().secondary()); } +TEST_F(StepDownTest, StepDownFailureRestoresDrainState) { + const auto repl = getReplCoord(); + + OpTimeWithTermOne opTime1(100, 1); + OpTimeWithTermOne opTime2(200, 1); + + repl->setMyLastAppliedOpTime(opTime2); + repl->setMyLastDurableOpTime(opTime2); + + // Secondaries not caught up yet. + ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 1, opTime1)); + ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 2, opTime1)); + + auto electionTimeoutWhen = getReplCoord()->getElectionTimeout_forTest(); + simulateSuccessfulV1ElectionWithoutExitingDrainMode(electionTimeoutWhen); + ASSERT_TRUE(repl->getMemberState().primary()); + ASSERT(repl->getApplierState() == ReplicationCoordinator::ApplierState::Draining); + + { + // We can't take writes yet since we're still in drain mode. + const auto opCtx = makeOperationContext(); + Lock::GlobalLock lock(opCtx.get(), MODE_IX); + ASSERT_FALSE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "admin")); + } + + // Step down where the secondary actually has to catch up before the stepDown can succeed. + auto result = stepDown_nonBlocking(false, Seconds(10), Seconds(60)); + + // Interrupt the ongoing stepdown command so that the stepdown attempt will fail. + { + stdx::lock_guard<Client> lk(*result.first.client.get()); + result.first.opCtx->markKilled(ErrorCodes::Interrupted); + } + + // Ensure that the stepdown command failed. + auto stepDownStatus = *result.second.get(); + ASSERT_NOT_OK(stepDownStatus); + // Which code is returned is racy. + ASSERT(stepDownStatus == ErrorCodes::PrimarySteppedDown || + stepDownStatus == ErrorCodes::Interrupted); + ASSERT_TRUE(getReplCoord()->getMemberState().primary()); + ASSERT(repl->getApplierState() == ReplicationCoordinator::ApplierState::Draining); + + // Ensure that the failed stepdown attempt didn't make us able to take writes since we're still + // in drain mode. + { + const auto opCtx = makeOperationContext(); + Lock::GlobalLock lock(opCtx.get(), MODE_IX); + ASSERT_FALSE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "admin")); + } + + // Now complete drain mode and ensure that we become capable of taking writes. + auto opCtx = makeOperationContext(); + signalDrainComplete(opCtx.get()); + ASSERT(repl->getApplierState() == ReplicationCoordinator::ApplierState::Stopped); + + ASSERT_TRUE(getReplCoord()->getMemberState().primary()); + Lock::GlobalLock lock(opCtx.get(), MODE_IX); + ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "admin")); +} + class StepDownTestWithUnelectableNode : public StepDownTest { private: void setUp() override { diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp index f5e7fdfbe5d..2d06e431162 100644 --- a/src/mongo/db/repl/topology_coordinator.cpp +++ b/src/mongo/db/repl/topology_coordinator.cpp @@ -1898,7 +1898,8 @@ bool TopologyCoordinator::prepareForUnconditionalStepDown() { return true; } -Status TopologyCoordinator::prepareForStepDownAttempt() { +StatusWith<TopologyCoordinator::StepDownAttemptAbortFn> +TopologyCoordinator::prepareForStepDownAttempt() { if (_leaderMode == LeaderMode::kSteppingDown || _leaderMode == LeaderMode::kAttemptingStepDown) { return Status{ErrorCodes::ConflictingOperationInProgress, @@ -1909,14 +1910,15 @@ Status TopologyCoordinator::prepareForStepDownAttempt() { return Status{ErrorCodes::NotMaster, "This node is not a primary."}; } + invariant(_leaderMode == LeaderMode::kMaster || _leaderMode == LeaderMode::kLeaderElect); + const auto previousLeaderMode = _leaderMode; _setLeaderMode(LeaderMode::kAttemptingStepDown); - return Status::OK(); -} -void TopologyCoordinator::abortAttemptedStepDownIfNeeded() { - if (_leaderMode == TopologyCoordinator::LeaderMode::kAttemptingStepDown) { - _setLeaderMode(TopologyCoordinator::LeaderMode::kMaster); - } + return {[this, previousLeaderMode] { + if (_leaderMode == TopologyCoordinator::LeaderMode::kAttemptingStepDown) { + _setLeaderMode(previousLeaderMode); + } + }}; } void TopologyCoordinator::changeMemberState_forTest(const MemberState& newMemberState, @@ -2769,7 +2771,7 @@ void TopologyCoordinator::_setLeaderMode(TopologyCoordinator::LeaderMode newMode break; case LeaderMode::kAttemptingStepDown: invariant(newMode == LeaderMode::kNotLeader || newMode == LeaderMode::kMaster || - newMode == LeaderMode::kSteppingDown); + newMode == LeaderMode::kSteppingDown || newMode == LeaderMode::kLeaderElect); break; case LeaderMode::kSteppingDown: invariant(newMode == LeaderMode::kNotLeader); diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h index 4fda590d021..ace1c5cb4e1 100644 --- a/src/mongo/db/repl/topology_coordinator.h +++ b/src/mongo/db/repl/topology_coordinator.h @@ -94,40 +94,6 @@ public: ~TopologyCoordinator(); - /** - * Different modes a node can be in while still reporting itself as in state PRIMARY. - * - * Valid transitions: - * - * kNotLeader <---------------------------------- - * | | - * | | - * | | - * v | - * kLeaderElect----- | - * | | | - * | | | - * v | | - * kMaster ------------------------- | - * | ^ | | | - * | | ------------------- | | - * | | | | | | - * v | v v v | - * kAttemptingStepDown----------->kSteppingDown | - * | | | - * | | | - * | | | - * --------------------------------------------- - * - */ - enum class LeaderMode { - kNotLeader, // This node is not currently a leader. - kLeaderElect, // This node has been elected leader, but can't yet accept writes. - kMaster, // This node reports ismaster:true and can accept writes. - kSteppingDown, // This node is in the middle of a (hb) stepdown that must complete. - kAttemptingStepDown, // This node is in the middle of a stepdown (cmd) that might fail. - }; - //////////////////////////////////////////////////////////// // // State inspection methods. @@ -583,6 +549,8 @@ public: */ void processLoseElection(); + + using StepDownAttemptAbortFn = stdx::function<void()>; /** * Readies the TopologyCoordinator for an attempt to stepdown that may fail. This is used * when we receive a stepdown command (which can fail if not enough secondaries are caught up) @@ -591,16 +559,10 @@ public: * - NotMaster if this node is not a leader. * - ConflictingOperationInProgess if this node is already processing a stepdown request of any * kind. + * On an OK return status also returns a function object that can be called to abort the + * pending stepdown attempt and return this node to normal primary/master state. */ - Status prepareForStepDownAttempt(); - - /** - * If this node is still attempting to process a stepdown attempt, aborts the attempt and - * returns this node to normal primary/master state. If this node has already completed - * stepping down or is now in the process of handling an unconditional stepdown, then this - * method does nothing. - */ - void abortAttemptedStepDownIfNeeded(); + StatusWith<StepDownAttemptAbortFn> prepareForStepDownAttempt(); /** * Tries to transition the coordinator from the leader role to the follower role. @@ -777,6 +739,40 @@ private: typedef int UnelectableReasonMask; class PingStats; + /** + * Different modes a node can be in while still reporting itself as in state PRIMARY. + * + * Valid transitions: + * + * kNotLeader <---------------------------------- + * | | + * | | + * | | + * v | + * kLeaderElect----------------- | + * | ^ | | | + * | | | | | + * v | | | | + * kMaster -------------------------- | + * | ^ | | | | | + * | | | | | | | + * | | | | | | | + * v | | v v v | + * kAttemptingStepDown----------->kSteppingDown | + * | | | + * | | | + * | | | + * --------------------------------------------- + * + */ + enum class LeaderMode { + kNotLeader, // This node is not currently a leader. + kLeaderElect, // This node has been elected leader, but can't yet accept writes. + kMaster, // This node reports ismaster:true and can accept writes. + kSteppingDown, // This node is in the middle of a (hb) stepdown that must complete. + kAttemptingStepDown, // This node is in the middle of a stepdown (cmd) that might fail. + }; + enum UnelectableReason { None = 0, CannotSeeMajority = 1 << 0, diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp index 4ca986b47f7..8c5c8230c98 100644 --- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp @@ -1911,7 +1911,7 @@ TEST_F(TopoCoordTest, PrepareStepDownAttemptFailsIfNotLeader) { getTopoCoord().changeMemberState_forTest(MemberState::RS_SECONDARY); Status expectedStatus(ErrorCodes::NotMaster, "This node is not a primary. "); - ASSERT_EQUALS(expectedStatus, getTopoCoord().prepareForStepDownAttempt()); + ASSERT_EQUALS(expectedStatus, getTopoCoord().prepareForStepDownAttempt().getStatus()); } class PrepareHeartbeatResponseV1Test : public TopoCoordTest { @@ -4328,7 +4328,7 @@ TEST_F(TopoCoordTest, StepDownAttemptFailsForDifferentTerm) { Date_t futureTime = curTime + Seconds(1); makeSelfPrimary(); - ASSERT_OK(getTopoCoord().prepareForStepDownAttempt()); + ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus()); ASSERT_THROWS_CODE( getTopoCoord().attemptStepDown(term - 1, curTime, futureTime, futureTime, false), @@ -4358,7 +4358,7 @@ TEST_F(TopoCoordTest, StepDownAttemptFailsIfPastStepDownUntil) { Date_t futureTime = curTime + Seconds(1); makeSelfPrimary(); - ASSERT_OK(getTopoCoord().prepareForStepDownAttempt()); + ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus()); ASSERT_THROWS_CODE_AND_WHAT( getTopoCoord().attemptStepDown(term, curTime, futureTime, curTime, false), @@ -4390,7 +4390,7 @@ TEST_F(TopoCoordTest, StepDownAttemptFailsIfPastWaitUntil) { Date_t futureTime = curTime + Seconds(1); makeSelfPrimary(); - ASSERT_OK(getTopoCoord().prepareForStepDownAttempt()); + ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus()); std::string expectedWhat = str::stream() << "No electable secondaries caught up as of " << dateToISOStringLocal(curTime) @@ -4426,7 +4426,7 @@ TEST_F(TopoCoordTest, StepDownAttemptFailsIfNoSecondariesCaughtUp) { makeSelfPrimary(); setMyOpTime(OpTime(Timestamp(5, 0), term)); - ASSERT_OK(getTopoCoord().prepareForStepDownAttempt()); + ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus()); heartbeatFromMember( HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(4, 0), term)); @@ -4459,7 +4459,7 @@ TEST_F(TopoCoordTest, StepDownAttemptFailsIfNoSecondariesCaughtUpForceIsTrueButN makeSelfPrimary(); setMyOpTime(OpTime(Timestamp(5, 0), term)); - ASSERT_OK(getTopoCoord().prepareForStepDownAttempt()); + ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus()); heartbeatFromMember( HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(4, 0), term)); @@ -4492,7 +4492,7 @@ TEST_F(TopoCoordTest, StepDownAttemptSucceedsIfNoSecondariesCaughtUpForceIsTrueA makeSelfPrimary(); setMyOpTime(OpTime(Timestamp(5, 0), term)); - ASSERT_OK(getTopoCoord().prepareForStepDownAttempt()); + ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus()); heartbeatFromMember( HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(4, 0), term)); @@ -4525,7 +4525,7 @@ TEST_F(TopoCoordTest, StepDownAttemptSucceedsIfSecondariesCaughtUp) { makeSelfPrimary(); setMyOpTime(OpTime(Timestamp(5, 0), term)); - ASSERT_OK(getTopoCoord().prepareForStepDownAttempt()); + ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus()); heartbeatFromMember( HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(5, 0), term)); @@ -4562,7 +4562,7 @@ TEST_F(TopoCoordTest, StepDownAttemptFailsIfSecondaryCaughtUpButNotElectable) { makeSelfPrimary(); setMyOpTime(OpTime(Timestamp(5, 0), term)); - ASSERT_OK(getTopoCoord().prepareForStepDownAttempt()); + ASSERT_OK(getTopoCoord().prepareForStepDownAttempt().getStatus()); heartbeatFromMember( HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(5, 0), term)); |