diff options
author | Tess Avitabile <tess.avitabile@mongodb.com> | 2020-05-07 11:04:47 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-11 13:31:05 +0000 |
commit | c5f84d128c27f0f603afeffb6717ee823d57606d (patch) | |
tree | e740cf740069bd6333438fc288ff7abc8fe79284 | |
parent | 087d6aeaaa10f401b22fa8c39be427150804aa2c (diff) | |
download | mongo-c5f84d128c27f0f603afeffb6717ee823d57606d.tar.gz |
SERVER-47832 ReplicationCoordinatorImpl::_makeIsMasterResponse() should check for quiesce mode
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_test.cpp | 46 |
2 files changed, 62 insertions, 5 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 9d1fc8502c5..ac24bd2d1bf 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -119,6 +119,8 @@ MONGO_FAIL_POINT_DEFINE(forceSyncSourceRetryWaitForInitialSync); MONGO_FAIL_POINT_DEFINE(waitForIsMasterResponse); // Will cause an isMaster request to hang as it starts waiting. MONGO_FAIL_POINT_DEFINE(hangWhileWaitingForIsMasterResponse); +// Will cause an isMaster request to hang after it times out waiting for a topology change. +MONGO_FAIL_POINT_DEFINE(hangAfterWaitingForTopologyChangeTimesOut); MONGO_FAIL_POINT_DEFINE(skipDurableTimestampUpdates); // Skip sending heartbeats to pre-check that a quorum is available before a reconfig. MONGO_FAIL_POINT_DEFINE(omitConfigQuorumCheck); @@ -213,6 +215,9 @@ StatusOrStatusWith<T> futureGetNoThrowWithDeadline(OperationContext* opCtx, } } +const Status kQuiesceModeShutdownStatus = + Status(ErrorCodes::ShutdownInProgress, "The server is in quiesce mode and will shut down"); + } // namespace void ReplicationCoordinatorImpl::WaiterList::add_inlock(const OpTime& opTime, @@ -2129,6 +2134,9 @@ void ReplicationCoordinatorImpl::updateAndLogStateTransitionMetrics( std::shared_ptr<IsMasterResponse> ReplicationCoordinatorImpl::_makeIsMasterResponse( boost::optional<StringData> horizonString, WithLock lock, const bool hasValidConfig) const { + uassert( + kQuiesceModeShutdownStatus.code(), kQuiesceModeShutdownStatus.reason(), !_inQuiesceMode); + if (!hasValidConfig) { auto response = std::make_shared<IsMasterResponse>(); response->setTopologyVersion(_topCoord->getTopologyVersion()); @@ -2170,9 +2178,8 @@ ReplicationCoordinatorImpl::_getIsMasterResponseFuture( boost::optional<StringData> horizonString, boost::optional<TopologyVersion> clientTopologyVersion) { - uassert(ErrorCodes::ShutdownInProgress, - "The server is in quiesce mode and will shut down", - !_inQuiesceMode); + uassert( + kQuiesceModeShutdownStatus.code(), kQuiesceModeShutdownStatus.reason(), !_inQuiesceMode); const bool hasValidConfig = horizonString != boost::none; @@ -2287,6 +2294,11 @@ std::shared_ptr<const IsMasterResponse> ReplicationCoordinatorImpl::awaitIsMaste futureGetNoThrowWithDeadline(opCtx, future, deadline.get(), opCtx->getTimeoutError()); auto status = statusWithIsMaster.getStatus(); + if (MONGO_unlikely(hangAfterWaitingForTopologyChangeTimesOut.shouldFail())) { + LOGV2(4783200, "Hanging due to hangAfterWaitingForTopologyChangeTimesOut failpoint"); + hangAfterWaitingForTopologyChangeTimesOut.pauseWhileSet(opCtx); + } + if (status == ErrorCodes::ExceededTimeLimit) { // Return an IsMasterResponse with the current topology version on timeout when waiting for // a topology change. @@ -3909,8 +3921,7 @@ void ReplicationCoordinatorImpl::_fulfillTopologyChangePromise(WithLock lock) { iter != _horizonToTopologyChangePromiseMap.end(); iter++) { if (_inQuiesceMode) { - iter->second->setError({ErrorCodes::ShutdownInProgress, - "The server is in quiesce mode and will shut down"}); + iter->second->setError(kQuiesceModeShutdownStatus); } else { StringData horizonString = iter->first; auto response = _makeIsMasterResponse(horizonString, lock, hasValidConfig); diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index be6f01d1a30..403dd3ab420 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -3318,6 +3318,52 @@ TEST_F(ReplCoordTest, IsMasterReturnsErrorOnEnteringQuiesceMode) { getIsMasterThread.join(); } +TEST_F(ReplCoordTest, IsMasterReturnsErrorOnEnteringQuiesceModeAfterWaitingTimesOut) { + init(); + assertStartSuccess(BSON("_id" + << "mySet" + << "version" << 1 << "members" + << BSON_ARRAY(BSON("host" + << "node1:12345" + << "_id" << 0))), + HostAndPort("node1", 12345)); + ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); + + auto opCtx = makeOperationContext(); + auto currentTopologyVersion = getTopoCoord().getTopologyVersion(); + + auto maxAwaitTime = Milliseconds(5000); + auto deadline = getNet()->now() + maxAwaitTime; + + stdx::thread getIsMasterThread([&] { + ASSERT_THROWS_CODE(getReplCoord()->awaitIsMasterResponse( + opCtx.get(), {}, currentTopologyVersion, deadline), + AssertionException, + ErrorCodes::ShutdownInProgress); + }); + + auto failPoint = globalFailPointRegistry().find("hangAfterWaitingForTopologyChangeTimesOut"); + auto timesEnteredFailPoint = failPoint->setMode(FailPoint::alwaysOn); + ON_BLOCK_EXIT([&] { failPoint->setMode(FailPoint::off, 0); }); + + getNet()->enterNetwork(); + getNet()->advanceTime(deadline); + ASSERT_EQUALS(deadline, getNet()->now()); + getNet()->exitNetwork(); + + // Ensure that waiting for a topology change timed out before entering quiesce mode. + failPoint->waitForTimesEntered(timesEnteredFailPoint + 1); + ASSERT(getReplCoord()->enterQuiesceModeIfSecondary()); + failPoint->setMode(FailPoint::off, 0); + + // Advance the clock so that pauseWhileSet() will wake up. + getNet()->enterNetwork(); + getNet()->advanceTime(getNet()->now() + Milliseconds(100)); + getNet()->exitNetwork(); + + getIsMasterThread.join(); +} + TEST_F(ReplCoordTest, IsMasterReturnsErrorInQuiesceMode) { init(); assertStartSuccess(BSON("_id" |