summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTess Avitabile <tess.avitabile@mongodb.com>2020-05-07 11:04:47 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-11 13:31:05 +0000
commitc5f84d128c27f0f603afeffb6717ee823d57606d (patch)
treee740cf740069bd6333438fc288ff7abc8fe79284
parent087d6aeaaa10f401b22fa8c39be427150804aa2c (diff)
downloadmongo-c5f84d128c27f0f603afeffb6717ee823d57606d.tar.gz
SERVER-47832 ReplicationCoordinatorImpl::_makeIsMasterResponse() should check for quiesce mode
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp21
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp46
2 files changed, 62 insertions, 5 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 9d1fc8502c5..ac24bd2d1bf 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -119,6 +119,8 @@ MONGO_FAIL_POINT_DEFINE(forceSyncSourceRetryWaitForInitialSync);
MONGO_FAIL_POINT_DEFINE(waitForIsMasterResponse);
// Will cause an isMaster request to hang as it starts waiting.
MONGO_FAIL_POINT_DEFINE(hangWhileWaitingForIsMasterResponse);
+// Will cause an isMaster request to hang after it times out waiting for a topology change.
+MONGO_FAIL_POINT_DEFINE(hangAfterWaitingForTopologyChangeTimesOut);
MONGO_FAIL_POINT_DEFINE(skipDurableTimestampUpdates);
// Skip sending heartbeats to pre-check that a quorum is available before a reconfig.
MONGO_FAIL_POINT_DEFINE(omitConfigQuorumCheck);
@@ -213,6 +215,9 @@ StatusOrStatusWith<T> futureGetNoThrowWithDeadline(OperationContext* opCtx,
}
}
+const Status kQuiesceModeShutdownStatus =
+ Status(ErrorCodes::ShutdownInProgress, "The server is in quiesce mode and will shut down");
+
} // namespace
void ReplicationCoordinatorImpl::WaiterList::add_inlock(const OpTime& opTime,
@@ -2129,6 +2134,9 @@ void ReplicationCoordinatorImpl::updateAndLogStateTransitionMetrics(
std::shared_ptr<IsMasterResponse> ReplicationCoordinatorImpl::_makeIsMasterResponse(
boost::optional<StringData> horizonString, WithLock lock, const bool hasValidConfig) const {
+ uassert(
+ kQuiesceModeShutdownStatus.code(), kQuiesceModeShutdownStatus.reason(), !_inQuiesceMode);
+
if (!hasValidConfig) {
auto response = std::make_shared<IsMasterResponse>();
response->setTopologyVersion(_topCoord->getTopologyVersion());
@@ -2170,9 +2178,8 @@ ReplicationCoordinatorImpl::_getIsMasterResponseFuture(
boost::optional<StringData> horizonString,
boost::optional<TopologyVersion> clientTopologyVersion) {
- uassert(ErrorCodes::ShutdownInProgress,
- "The server is in quiesce mode and will shut down",
- !_inQuiesceMode);
+ uassert(
+ kQuiesceModeShutdownStatus.code(), kQuiesceModeShutdownStatus.reason(), !_inQuiesceMode);
const bool hasValidConfig = horizonString != boost::none;
@@ -2287,6 +2294,11 @@ std::shared_ptr<const IsMasterResponse> ReplicationCoordinatorImpl::awaitIsMaste
futureGetNoThrowWithDeadline(opCtx, future, deadline.get(), opCtx->getTimeoutError());
auto status = statusWithIsMaster.getStatus();
+ if (MONGO_unlikely(hangAfterWaitingForTopologyChangeTimesOut.shouldFail())) {
+ LOGV2(4783200, "Hanging due to hangAfterWaitingForTopologyChangeTimesOut failpoint");
+ hangAfterWaitingForTopologyChangeTimesOut.pauseWhileSet(opCtx);
+ }
+
if (status == ErrorCodes::ExceededTimeLimit) {
// Return an IsMasterResponse with the current topology version on timeout when waiting for
// a topology change.
@@ -3909,8 +3921,7 @@ void ReplicationCoordinatorImpl::_fulfillTopologyChangePromise(WithLock lock) {
iter != _horizonToTopologyChangePromiseMap.end();
iter++) {
if (_inQuiesceMode) {
- iter->second->setError({ErrorCodes::ShutdownInProgress,
- "The server is in quiesce mode and will shut down"});
+ iter->second->setError(kQuiesceModeShutdownStatus);
} else {
StringData horizonString = iter->first;
auto response = _makeIsMasterResponse(horizonString, lock, hasValidConfig);
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index be6f01d1a30..403dd3ab420 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -3318,6 +3318,52 @@ TEST_F(ReplCoordTest, IsMasterReturnsErrorOnEnteringQuiesceMode) {
getIsMasterThread.join();
}
+TEST_F(ReplCoordTest, IsMasterReturnsErrorOnEnteringQuiesceModeAfterWaitingTimesOut) {
+ init();
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0))),
+ HostAndPort("node1", 12345));
+ ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ auto opCtx = makeOperationContext();
+ auto currentTopologyVersion = getTopoCoord().getTopologyVersion();
+
+ auto maxAwaitTime = Milliseconds(5000);
+ auto deadline = getNet()->now() + maxAwaitTime;
+
+ stdx::thread getIsMasterThread([&] {
+ ASSERT_THROWS_CODE(getReplCoord()->awaitIsMasterResponse(
+ opCtx.get(), {}, currentTopologyVersion, deadline),
+ AssertionException,
+ ErrorCodes::ShutdownInProgress);
+ });
+
+ auto failPoint = globalFailPointRegistry().find("hangAfterWaitingForTopologyChangeTimesOut");
+ auto timesEnteredFailPoint = failPoint->setMode(FailPoint::alwaysOn);
+ ON_BLOCK_EXIT([&] { failPoint->setMode(FailPoint::off, 0); });
+
+ getNet()->enterNetwork();
+ getNet()->advanceTime(deadline);
+ ASSERT_EQUALS(deadline, getNet()->now());
+ getNet()->exitNetwork();
+
+ // Ensure that waiting for a topology change timed out before entering quiesce mode.
+ failPoint->waitForTimesEntered(timesEnteredFailPoint + 1);
+ ASSERT(getReplCoord()->enterQuiesceModeIfSecondary());
+ failPoint->setMode(FailPoint::off, 0);
+
+ // Advance the clock so that pauseWhileSet() will wake up.
+ getNet()->enterNetwork();
+ getNet()->advanceTime(getNet()->now() + Milliseconds(100));
+ getNet()->exitNetwork();
+
+ getIsMasterThread.join();
+}
+
TEST_F(ReplCoordTest, IsMasterReturnsErrorInQuiesceMode) {
init();
assertStartSuccess(BSON("_id"