diff options
author | XueruiFa <xuerui.fa@mongodb.com> | 2020-07-20 20:41:32 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-07-28 17:41:02 +0000 |
commit | f11b0351c33d2888607eebd1748d524d241fc9ba (patch) | |
tree | 2f85cf1f4242726edbe00739c5a5a55667c8b98e /src/mongo/db/repl | |
parent | 26c5900914fda23a6c5ab85995afbab4eddd4e58 (diff) | |
download | mongo-f11b0351c33d2888607eebd1748d524d241fc9ba.tar.gz |
SERVER-48938: Allow primary-elect to complete drain mode even if it is stepping down unconditionally
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_test.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.h | 3 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_v1_test.cpp | 15 |
5 files changed, 17 insertions, 28 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 52908aedbae..990e354a3a8 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -1223,15 +1223,7 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx, firstOpTime.getTerm()); lk.lock(); - auto status = _topCoord->completeTransitionToPrimary(firstOpTime); - if (status.code() == ErrorCodes::PrimarySteppedDown) { - LOGV2(21330, - "Transition to primary failed {error}", - "Transition to primary failed", - "error"_attr = causedBy(status)); - return; - } - invariant(status); + _topCoord->completeTransitionToPrimary(firstOpTime); invariant(firstOpTime.getTerm() == _topCoord->getTerm()); invariant(termWhenBufferIsEmpty == _topCoord->getTerm()); } diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index c2cffedff17..55e0b5c277d 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -1679,6 +1679,9 @@ TEST_F(ReplCoordTest, DrainCompletionMidStepDown) { ASSERT(updateTermEvh.isValid()); ASSERT(termUpdated == TopologyCoordinator::UpdateTermResult::kTriggerStepDown); + // Set 'firstOpTimeOfMyTerm' to have term 1, so that the node will see that the noop entry has + // the correct term at the end of signalDrainComplete. + getExternalState()->setFirstOpTimeOfMyTerm(OpTime(Timestamp(100, 1), 1)); // Now signal that replication applier is finished draining its buffer. getReplCoord()->signalDrainComplete(opCtx.get(), getReplCoord()->getTerm()); diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp index 8bc7cf36a2c..d32e3caa6ec 100644 --- a/src/mongo/db/repl/topology_coordinator.cpp +++ b/src/mongo/db/repl/topology_coordinator.cpp @@ -2886,24 +2886,22 @@ bool TopologyCoordinator::canCompleteTransitionToPrimary(long long termWhenDrain } // Allow completing the transition to primary even when in the middle of a stepdown attempt, // in case the stepdown attempt fails. - if (_leaderMode != LeaderMode::kLeaderElect && _leaderMode != LeaderMode::kAttemptingStepDown) { + if (_leaderMode != LeaderMode::kLeaderElect && _leaderMode != LeaderMode::kAttemptingStepDown && + _leaderMode != LeaderMode::kSteppingDown) { return false; } return true; } -Status TopologyCoordinator::completeTransitionToPrimary(const OpTime& firstOpTimeOfTerm) { - if (!canCompleteTransitionToPrimary(firstOpTimeOfTerm.getTerm())) { - return Status(ErrorCodes::PrimarySteppedDown, - "By the time this node was ready to complete its transition to PRIMARY it " - "was no longer eligible to do so"); - } +void TopologyCoordinator::completeTransitionToPrimary(const OpTime& firstOpTimeOfTerm) { + invariant(canCompleteTransitionToPrimary(firstOpTimeOfTerm.getTerm())); + if (_leaderMode == LeaderMode::kLeaderElect) { _setLeaderMode(LeaderMode::kMaster); } + _firstOpTimeOfMyTerm = firstOpTimeOfTerm; - return Status::OK(); } void TopologyCoordinator::adjustMaintenanceCountBy(int inc) { diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h index 3810c0e1010..b5f3f634568 100644 --- a/src/mongo/db/repl/topology_coordinator.h +++ b/src/mongo/db/repl/topology_coordinator.h @@ -328,9 +328,8 @@ public: * "firstOpTimeOfTerm" is a floor on the OpTimes this node will be allowed to consider committed * for this tenure as primary. This prevents entries from before our election from counting as * committed in our view, until our election (the "firstOpTimeOfTerm" op) has been committed. - * Returns PrimarySteppedDown if this node is no longer eligible to begin accepting writes. */ - Status completeTransitionToPrimary(const OpTime& firstOpTimeOfTerm); + void completeTransitionToPrimary(const OpTime& firstOpTimeOfTerm); /** * Adjusts the maintenance mode count by "inc". diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp index dba14d0f8c4..b81f7df01f1 100644 --- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp @@ -132,7 +132,7 @@ protected: getTopoCoord().setCurrentPrimary_forTest(_selfIndex, electionTimestamp); OpTime dummyOpTime(Timestamp(1, 1), getTopoCoord().getTerm()); setMyOpTime(dummyOpTime); - ASSERT_OK(getTopoCoord().completeTransitionToPrimary(dummyOpTime)); + getTopoCoord().completeTransitionToPrimary(dummyOpTime); } void setMyOpTime(const OpTime& opTime, Date_t wallTime = Date_t()) { @@ -6913,7 +6913,7 @@ TEST_F(HeartbeatResponseTestV1, NodeDoesNotStepDownSelfWhenRemoteNodeWasElectedL ASSERT_NO_ACTION(nextAction.getAction()); } -TEST_F(HeartbeatResponseTestV1, NodeWillNotTransitionToPrimaryAfterHearingAboutNewerTerm) { +TEST_F(HeartbeatResponseTestV1, NodeWillCompleteTransitionToPrimaryAfterHearingAboutNewerTerm) { auto initialTerm = getTopoCoord().getTerm(); OpTime firstOpTimeOfTerm(Timestamp(1, 1), initialTerm); @@ -6922,17 +6922,14 @@ TEST_F(HeartbeatResponseTestV1, NodeWillNotTransitionToPrimaryAfterHearingAboutN firstOpTimeOfTerm.getTimestamp()); getTopoCoord().setCurrentPrimary_forTest(getSelfIndex()); - // At first transition to primary is OK - ASSERT(getTopoCoord().canCompleteTransitionToPrimary(initialTerm)); + // Verify that transition to primary is OK. + ASSERT_TRUE(getTopoCoord().canCompleteTransitionToPrimary(initialTerm)); // Now mark ourselves as mid-stepdown, as if we had heard about a new term. getTopoCoord().prepareForUnconditionalStepDown(); - ASSERT_FALSE(getTopoCoord().canCompleteTransitionToPrimary(initialTerm)); - - // Check that transitioning to primary fails now that the term has been updated. - ASSERT_EQUALS(ErrorCodes::PrimarySteppedDown, - getTopoCoord().completeTransitionToPrimary(firstOpTimeOfTerm)); + // Verify that the transition to primary can still complete. + ASSERT_TRUE(getTopoCoord().canCompleteTransitionToPrimary(initialTerm)); } TEST_F(HeartbeatResponseTestV1, |