From 3b4e9894e2971c2b42b83c336a48068810c91a9c Mon Sep 17 00:00:00 2001 From: Pavi Vetriselvan Date: Thu, 30 Apr 2020 10:51:05 -0400 Subject: SERVER-46957 Implement Quiesce Mode for mongos --- src/mongo/SConscript | 1 + src/mongo/s/mongos_topology_coordinator.cpp | 26 +++++++++ src/mongo/s/mongos_topology_coordinator.h | 14 +++-- src/mongo/s/mongos_topology_coordinator_test.cpp | 67 ++++++++++++++++-------- src/mongo/s/server.cpp | 8 +++ src/mongo/shell/shardingtest.js | 11 ++-- 6 files changed, 98 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/mongo/SConscript b/src/mongo/SConscript index 6d789b60156..0af7144915e 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -564,6 +564,7 @@ mongos = env.Program( 's/committed_optime_metadata_hook', 's/coreshard', 's/is_mongos', + 's/mongos_topology_coordinator', 's/query/cluster_cursor_cleanup_job', 's/sessions_collection_sharded', 's/sharding_egress_metadata_hook_for_mongos', diff --git a/src/mongo/s/mongos_topology_coordinator.cpp b/src/mongo/s/mongos_topology_coordinator.cpp index 9f88ad7c56e..4f889b65b83 100644 --- a/src/mongo/s/mongos_topology_coordinator.cpp +++ b/src/mongo/s/mongos_topology_coordinator.cpp @@ -53,10 +53,14 @@ MONGO_INITIALIZER(GenerateMongosInstanceId)(InitializerContext*) { return Status::OK(); } +// Signals that an isMaster request has started waiting. +MONGO_FAIL_POINT_DEFINE(waitForIsMasterResponse); // Awaitable isMaster requests with the proper topologyVersions are expected to wait for // maxAwaitTimeMS on mongos. When set, this failpoint will hang right before waiting on a // topology change. MONGO_FAIL_POINT_DEFINE(hangWhileWaitingForIsMasterResponse); +// Failpoint for hanging during quiesce mode on mongos. +MONGO_FAIL_POINT_DEFINE(hangDuringQuiesceMode); template StatusOrStatusWith futureGetNoThrowWithDeadline(OperationContext* opCtx, @@ -137,6 +141,12 @@ std::shared_ptr MongosTopologyCoordinator::awaitIs IsMasterMetrics::get(opCtx)->incrementNumAwaitingTopologyChanges(); lk.unlock(); + if (MONGO_unlikely(waitForIsMasterResponse.shouldFail())) { + // Used in tests that wait for this failpoint to be entered before shutting down mongos, + // which is the only action that triggers a topology change. + LOGV2(4695704, "waitForIsMasterResponse failpoint enabled"); + } + if (MONGO_unlikely(hangWhileWaitingForIsMasterResponse.shouldFail())) { LOGV2(4695501, "hangWhileWaitingForIsMasterResponse failpoint enabled"); hangWhileWaitingForIsMasterResponse.pauseWhileSet(opCtx); @@ -182,4 +192,20 @@ void MongosTopologyCoordinator::enterQuiesceMode() { IsMasterMetrics::get(getGlobalServiceContext())->resetNumAwaitingTopologyChanges(); } +void MongosTopologyCoordinator::enterQuiesceModeAndWait(OperationContext* opCtx) { + enterQuiesceMode(); + + if (MONGO_unlikely(hangDuringQuiesceMode.shouldFail())) { + LOGV2(4695700, "hangDuringQuiesceMode failpoint enabled"); + hangDuringQuiesceMode.pauseWhileSet(opCtx); + } + + // TODO SERVER-46958: Determine what the quiesce time should be by checking the + // shutdownTimeoutMillisForSignaledShutdown mongos server parameter. + auto timeout = Milliseconds(100); + LOGV2(4695701, "Entering quiesce mode for mongos shutdown", "quiesceTime"_attr = timeout); + opCtx->sleepFor(timeout); + LOGV2(4695702, "Exiting quiesce mode for mongos shutdown"); +} + } // namespace mongo diff --git a/src/mongo/s/mongos_topology_coordinator.h b/src/mongo/s/mongos_topology_coordinator.h index 903a17aa2ca..708de6a1241 100644 --- a/src/mongo/s/mongos_topology_coordinator.h +++ b/src/mongo/s/mongos_topology_coordinator.h @@ -64,13 +64,19 @@ public: /** * We only enter quiesce mode during the shutdown process, which means the - * MongosTopologyCoordinator will never need to exit quiesce mode. While in quiesce mode, we - * allow operations to continue and accept new operations, but we fail isMaster requests with - * ShutdownInProgress. This function causes us to increment the topologyVersion and start - * failing isMaster requests with ShutdownInProgress. + * MongosTopologyCoordinator will never need to exit quiesce mode. This function causes us to + * increment the topologyVersion and start failing isMaster requests with ShutdownInProgress. */ void enterQuiesceMode(); + /** + * While in quiesce mode, we will sleep for 100ms. This allows short running operations to + * continue. We will also accept new operations, but we fail isMaster requests with + * ShutdownInProgress. + * TODO SERVER-46958: Modify comment with correct timeout value. + */ + void enterQuiesceModeAndWait(OperationContext* opCtx); + TopologyVersion getTopologyVersion() const { stdx::lock_guard lk(_mutex); return _topologyVersion; diff --git a/src/mongo/s/mongos_topology_coordinator_test.cpp b/src/mongo/s/mongos_topology_coordinator_test.cpp index f63d43e06ac..714943428e2 100644 --- a/src/mongo/s/mongos_topology_coordinator_test.cpp +++ b/src/mongo/s/mongos_topology_coordinator_test.cpp @@ -48,7 +48,18 @@ class MongosTopoCoordTest : public ServiceContextTest { public: virtual void setUp() { _topo = std::make_unique(); + + getServiceContext()->setFastClockSource(std::make_unique()); + _fastClock = dynamic_cast(getServiceContext()->getFastClockSource()); + getServiceContext()->setPreciseClockSource(std::make_unique()); + _preciseClock = + dynamic_cast(getServiceContext()->getPreciseClockSource()); + } + + virtual void tearDown() { + _fastClock = nullptr; + _preciseClock = nullptr; } protected: @@ -60,14 +71,27 @@ protected: } /** - * Gets the clock used by MongosTopologyCoordinator. + * Advance the time by millis on both clock source mocks. + */ + void advanceTime(Milliseconds millis) { + _fastClock->advance(millis); + _preciseClock->advance(millis); + } + + /** + * Assumes that the times on both clock source mocks is the same. */ - ClockSourceMock* getClock() { - return dynamic_cast(getServiceContext()->getPreciseClockSource()); + Date_t now() { + invariant(_fastClock->now() == _preciseClock->now()); + return _fastClock->now(); } private: unique_ptr _topo; + // The fast clock is used by OperationContext::hasDeadlineExpired. + ClockSourceMock* _fastClock; + // The precise clock is used by waitForConditionOrInterruptNoAssertUntil. + ClockSourceMock* _preciseClock; }; TEST_F(MongosTopoCoordTest, MongosTopologyVersionCounterInitializedAtStartup) { @@ -96,8 +120,8 @@ TEST_F(MongosTopoCoordTest, AwaitIsMasterResponseReturnsCurrentMongosTopologyVer auto opCtx = makeOperationContext(); auto maxAwaitTime = Milliseconds(5000); auto halfwayToMaxAwaitTime = maxAwaitTime / 2; - auto halfwayToDeadline = getClock()->now() + halfwayToMaxAwaitTime; - auto deadline = getClock()->now() + maxAwaitTime; + auto halfwayToDeadline = now() + halfwayToMaxAwaitTime; + auto deadline = now() + maxAwaitTime; // isMaster request with the current TopologyVersion should attempt to wait for maxAwaitTimeMS. auto currentTopologyVersion = getTopoCoord().getTopologyVersion(); @@ -113,14 +137,14 @@ TEST_F(MongosTopoCoordTest, AwaitIsMasterResponseReturnsCurrentMongosTopologyVer ASSERT_EQUALS(topologyVersion.getProcessId(), currentTopologyVersion.getProcessId()); }); - // Advance the clock halfway and make sure awaitIsMasterResponse did not return yet. - getClock()->advance(halfwayToMaxAwaitTime); - ASSERT_EQUALS(halfwayToDeadline, getClock()->now()); + // Advance the clocks halfway and make sure awaitIsMasterResponse did not return yet. + advanceTime(halfwayToMaxAwaitTime); + ASSERT_EQUALS(halfwayToDeadline, now()); ASSERT_FALSE(isMasterReturned); - // Advance the clock the rest of the way so that awaitIsMasterResponse times out. - getClock()->advance(halfwayToMaxAwaitTime); - ASSERT_EQUALS(deadline, getClock()->now()); + // Advance the clocks the rest of the way so that awaitIsMasterResponse times out. + advanceTime(halfwayToMaxAwaitTime); + ASSERT_EQUALS(deadline, now()); getIsMasterThread.join(); ASSERT_TRUE(isMasterReturned); } @@ -128,7 +152,7 @@ TEST_F(MongosTopoCoordTest, AwaitIsMasterResponseReturnsCurrentMongosTopologyVer TEST_F(MongosTopoCoordTest, AwaitIsMasterErrorsWithHigherCounterAndSameProcessID) { auto opCtx = makeOperationContext(); auto maxAwaitTime = Milliseconds(5000); - auto deadline = getClock()->now() + maxAwaitTime; + auto deadline = now() + maxAwaitTime; auto currentTopologyVersion = getTopoCoord().getTopologyVersion(); @@ -147,7 +171,7 @@ TEST_F(MongosTopoCoordTest, AwaitIsMasterErrorsWithHigherCounterAndSameProcessID TEST_F(MongosTopoCoordTest, AwaitIsMasterReturnsImmediatelyWithHigherCounterAndDifferentProcessID) { auto opCtx = makeOperationContext(); auto maxAwaitTime = Milliseconds(5000); - auto deadline = getClock()->now() + maxAwaitTime; + auto deadline = now() + maxAwaitTime; auto currentTopologyVersion = getTopoCoord().getTopologyVersion(); @@ -168,7 +192,7 @@ TEST_F(MongosTopoCoordTest, AwaitIsMasterReturnsImmediatelyWithCurrentCounterAndDifferentProcessID) { auto opCtx = makeOperationContext(); auto maxAwaitTime = Milliseconds(5000); - auto deadline = getClock()->now() + maxAwaitTime; + auto deadline = now() + maxAwaitTime; auto currentTopologyVersion = getTopoCoord().getTopologyVersion(); @@ -200,14 +224,15 @@ TEST_F(MongosTopoCoordTest, AwaitIsMasterReturnsImmediatelyWithNoTopologyVersion TEST_F(MongosTopoCoordTest, IsMasterReturnsErrorInQuiesceMode) { auto currentTopologyVersion = getTopoCoord().getTopologyVersion(); + auto opCtx = makeOperationContext(); + auto maxAwaitTime = Milliseconds(5000); + auto deadline = now() + maxAwaitTime; + getTopoCoord().enterQuiesceMode(); + ASSERT_EQUALS(currentTopologyVersion.getCounter() + 1, getTopoCoord().getTopologyVersion().getCounter()); - auto opCtx = makeOperationContext(); - auto maxAwaitTime = Milliseconds(5000); - auto deadline = getClock()->now() + maxAwaitTime; - // The following isMaster requests should fail immediately with ShutdownInProgress errors // instead of following the usual error precedence. @@ -244,6 +269,7 @@ TEST_F(MongosTopoCoordTest, IsMasterReturnsErrorOnEnteringQuiesceMode) { auto opCtx = makeOperationContext(); auto currentTopologyVersion = getTopoCoord().getTopologyVersion(); auto maxAwaitTime = Milliseconds(5000); + auto deadline = now() + maxAwaitTime; // This will cause the isMaster request to hang. auto waitForIsMasterFailPoint = @@ -251,9 +277,6 @@ TEST_F(MongosTopoCoordTest, IsMasterReturnsErrorOnEnteringQuiesceMode) { auto timesEnteredFailPoint = waitForIsMasterFailPoint->setMode(FailPoint::alwaysOn); ON_BLOCK_EXIT([&] { waitForIsMasterFailPoint->setMode(FailPoint::off, 0); }); stdx::thread getIsMasterThread([&] { - auto maxAwaitTime = Milliseconds(5000); - auto deadline = getClock()->now() + maxAwaitTime; - ASSERT_THROWS_CODE( getTopoCoord().awaitIsMasterResponse(opCtx.get(), currentTopologyVersion, deadline), AssertionException, @@ -266,7 +289,7 @@ TEST_F(MongosTopoCoordTest, IsMasterReturnsErrorOnEnteringQuiesceMode) { ASSERT_EQUALS(currentTopologyVersion.getCounter() + 1, getTopoCoord().getTopologyVersion().getCounter()); waitForIsMasterFailPoint->setMode(FailPoint::off); - getClock()->advance(maxAwaitTime); + advanceTime(maxAwaitTime); getIsMasterThread.join(); } diff --git a/src/mongo/s/server.cpp b/src/mongo/s/server.cpp index d71d596cb82..5f0589aded1 100644 --- a/src/mongo/s/server.cpp +++ b/src/mongo/s/server.cpp @@ -85,6 +85,7 @@ #include "mongo/s/grid.h" #include "mongo/s/is_mongos.h" #include "mongo/s/mongos_options.h" +#include "mongo/s/mongos_topology_coordinator.h" #include "mongo/s/query/cluster_cursor_cleanup_job.h" #include "mongo/s/query/cluster_cursor_manager.h" #include "mongo/s/service_entry_point_mongos.h" @@ -268,6 +269,13 @@ void cleanupTask(ServiceContext* serviceContext) { opCtx = uniqueTxn.get(); } + // Enter quiesce mode so that existing and new short operations are allowed to finish. + // At this point, we will start responding to any isMaster request with ShutdownInProgress + // so that clients can re-route their operations. + if (auto mongosTopCoord = MongosTopologyCoordinator::get(opCtx)) { + mongosTopCoord->enterQuiesceModeAndWait(opCtx); + } + // Shutdown the TransportLayer so that new connections aren't accepted if (auto tl = serviceContext->getTransportLayer()) { LOGV2_OPTIONS( diff --git a/src/mongo/shell/shardingtest.js b/src/mongo/shell/shardingtest.js index e222bacc090..af86706a6aa 100644 --- a/src/mongo/shell/shardingtest.js +++ b/src/mongo/shell/shardingtest.js @@ -711,13 +711,18 @@ var ShardingTest = function(params) { /** * Kills the mongos with index n. + * + * @param {boolean} [extraOptions.waitPid=true] if true, we will wait for the process to + * terminate after stopping it. */ - this.stopMongos = function(n, opts) { + this.stopMongos = function(n, opts, { + waitpid: waitpid = true, + } = {}) { if (otherParams.useBridge) { - MongoRunner.stopMongos(unbridgedMongos[n], undefined, opts); + MongoRunner.stopMongos(unbridgedMongos[n], undefined, opts, waitpid); this["s" + n].stop(); } else { - MongoRunner.stopMongos(this["s" + n], undefined, opts); + MongoRunner.stopMongos(this["s" + n], undefined, opts, waitpid); } }; -- cgit v1.2.1