diff options
26 files changed, 399 insertions, 53 deletions
diff --git a/jstests/replsets/new_sync_source_in_quiesce_mode.js b/jstests/replsets/new_sync_source_in_quiesce_mode.js new file mode 100644 index 00000000000..a94f5abfed7 --- /dev/null +++ b/jstests/replsets/new_sync_source_in_quiesce_mode.js @@ -0,0 +1,62 @@ +/* + * Test that fetching oplog from a new sync source that is in quiesce mode fails to establish a + * connection, causing the server to reenter sync source selection. + * + * @tags: [requires_fcv_46] + */ + +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/write_concern_util.js"); + +const rst = new ReplSetTest({ + name: "new_sync_source_in_quiesce_mode", + nodes: 3, + nodeOptions: {setParameter: "shutdownTimeoutMillisForSignaledShutdown=5000"} +}); +rst.startSet(); +const syncSource = rst.nodes[1]; +const syncingNode = rst.nodes[2]; + +// Make sure the syncSource syncs only from the new primary. This is so that we prevent +// syncingNode from blacklisting syncSource because it isn't syncing from anyone. +assert.commandWorked(syncSource.adminCommand({ + configureFailPoint: "forceSyncSourceCandidate", + mode: "alwaysOn", + data: {hostAndPort: rst.nodes[0].host} +})); +rst.initiateWithHighElectionTimeout(); + +const primary = rst.getPrimary(); + +// Stop replication on the syncingNode so that the primary and syncSource will both +// definitely be ahead of it. +stopServerReplication(syncingNode); + +jsTestLog("Ensure syncSource is ahead of syncingNode."); +// Write some data on the primary, which will only be replicated to the syncSource. +assert.commandWorked(primary.getDB("test").c.insert({a: 1}), {writeConcern: {w: 2}}); + +jsTestLog("Transition syncSource to quiesce mode."); +let quiesceModeFailPoint = configureFailPoint(syncSource, "hangDuringQuiesceMode"); +rst.stop(syncSource, null /*signal*/, null /*opts*/, {forRestart: true, waitpid: false}); +quiesceModeFailPoint.wait(); + +jsTestLog("Ensure syncingNode tries to sync from syncSource."); +// Use the replSetSyncFrom command to try and connect to the syncSource in quiesce mode. +assert.commandWorked(syncingNode.adminCommand({replSetSyncFrom: syncSource.name})); +restartServerReplication(syncingNode); +// We will have blacklisted syncSource since it is shutting down, so we should re-enter +// sync source selection and eventually choose the primary as our sync source. +rst.awaitSyncSource(syncingNode, primary); + +jsTestLog("Restart syncSource."); +quiesceModeFailPoint.off(); +rst.restart(syncSource); +rst.awaitSecondaryNodes(); + +jsTestLog("Finish test."); +rst.stopSet(); +})();
\ No newline at end of file diff --git a/jstests/replsets/quiesce_mode.js b/jstests/replsets/quiesce_mode.js index 6dd1827e4c0..afdf1cefc83 100644 --- a/jstests/replsets/quiesce_mode.js +++ b/jstests/replsets/quiesce_mode.js @@ -31,6 +31,10 @@ function checkTopologyVersion(res, topologyVersionField) { assert.eq(res.topologyVersion.counter, topologyVersionField.counter + 1); } +function checkRemainingQuiesceTime(res) { + assert(res.hasOwnProperty("remainingQuiesceTimeMillis"), res); +} + function runAwaitableIsMaster(topologyVersionField) { let res = assert.commandFailedWithCode(db.runCommand({ isMaster: 1, @@ -39,6 +43,7 @@ function runAwaitableIsMaster(topologyVersionField) { }), ErrorCodes.ShutdownInProgress); assert(res.hasOwnProperty("topologyVersion"), res); + assert(res.hasOwnProperty("remainingQuiesceTimeMillis"), res); assert.eq(res.topologyVersion.counter, topologyVersionField.counter + 1); } @@ -83,16 +88,20 @@ assert.commandFailedWithCode(secondaryDB.adminCommand({serverStatus: 1}), ErrorCodes.ShutdownInProgress); jsTestLog("New isMaster commands return a ShutdownInProgress error."); -checkTopologyVersion(assert.commandFailedWithCode(secondary.adminCommand({isMaster: 1}), - ErrorCodes.ShutdownInProgress), - topologyVersionField); -checkTopologyVersion(assert.commandFailedWithCode(secondary.adminCommand({ +res = assert.commandFailedWithCode(secondary.adminCommand({isMaster: 1}), + ErrorCodes.ShutdownInProgress); +checkTopologyVersion(res, topologyVersionField); +checkRemainingQuiesceTime(res); + +res = assert.commandFailedWithCode(secondary.adminCommand({ isMaster: 1, topologyVersion: topologyVersionField, maxAwaitTimeMS: 99999999, }), - ErrorCodes.ShutdownInProgress), - topologyVersionField); + ErrorCodes.ShutdownInProgress); + +checkTopologyVersion(res, topologyVersionField); +checkRemainingQuiesceTime(res); // Test operation behavior during quiesce mode. jsTestLog("The running operation is allowed to finish."); @@ -166,16 +175,20 @@ assert.commandFailedWithCode(primaryDB.adminCommand({serverStatus: 1}), ErrorCodes.ShutdownInProgress); jsTestLog("New isMaster commands return a ShutdownInProgress error."); -checkTopologyVersion(assert.commandFailedWithCode(primary.adminCommand({isMaster: 1}), - ErrorCodes.ShutdownInProgress), - topologyVersionField); -checkTopologyVersion(assert.commandFailedWithCode(primary.adminCommand({ +res = assert.commandFailedWithCode(primary.adminCommand({isMaster: 1}), + ErrorCodes.ShutdownInProgress); +checkTopologyVersion(res, topologyVersionField); +checkRemainingQuiesceTime(res); + +res = assert.commandFailedWithCode(primary.adminCommand({ isMaster: 1, topologyVersion: topologyVersionField, maxAwaitTimeMS: 99999999, }), - ErrorCodes.ShutdownInProgress), - topologyVersionField); + ErrorCodes.ShutdownInProgress); + +checkTopologyVersion(res, topologyVersionField); +checkRemainingQuiesceTime(res); // Test operation behavior during quiesce mode. jsTestLog("The running operation is allowed to finish."); diff --git a/jstests/sharding/mongos_quiesce_mode.js b/jstests/sharding/mongos_quiesce_mode.js index f87e21f47a5..ec86028bc96 100644 --- a/jstests/sharding/mongos_quiesce_mode.js +++ b/jstests/sharding/mongos_quiesce_mode.js @@ -26,6 +26,10 @@ function checkTopologyVersion(res, topologyVersionField) { assert.eq(res.topologyVersion.counter, topologyVersionField.counter + 1); } +function checkRemainingQuiesceTime(res) { + assert(res.hasOwnProperty("remainingQuiesceTimeMillis"), res); +} + function runAwaitableIsMaster(topologyVersionField) { let res = assert.commandFailedWithCode(db.runCommand({ isMaster: 1, @@ -83,9 +87,11 @@ jsTestLog("The waiting isMaster returns a ShutdownInProgress error."); isMaster(); jsTestLog("New isMaster command returns a ShutdownInProgress error."); -checkTopologyVersion( - assert.commandFailedWithCode(mongos.adminCommand({isMaster: 1}), ErrorCodes.ShutdownInProgress), - topologyVersionField); +res = + assert.commandFailedWithCode(mongos.adminCommand({isMaster: 1}), ErrorCodes.ShutdownInProgress); + +checkTopologyVersion(res, topologyVersionField); +checkRemainingQuiesceTime(res); // Test operation behavior during quiesce mode. jsTestLog("The running read operation is allowed to finish."); diff --git a/src/mongo/base/error_codes.yml b/src/mongo/base/error_codes.yml index 1dc2a7035d7..a099470be4d 100644 --- a/src/mongo/base/error_codes.yml +++ b/src/mongo/base/error_codes.yml @@ -117,7 +117,10 @@ error_codes: - {code: 88,name: SplitFailed_OBSOLETE} - {code: 89,name: NetworkTimeout,categories: [NetworkError,RetriableError,NetworkTimeoutError]} - {code: 90,name: CallbackCanceled,categories: [CancelationError]} - - {code: 91,name: ShutdownInProgress,categories: [ShutdownError,CancelationError,RetriableError]} + - {code: 91,name: ShutdownInProgress, + extra: ShutdownInProgressQuiesceInfo, + categories: [ShutdownError,CancelationError,RetriableError], + extraIsOptional: True} - {code: 92,name: SecondaryAheadOfPrimary} - {code: 93,name: InvalidReplicaSetConfig} - {code: 94,name: NotYetInitialized} diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index fe879a277e9..81397c5c5d3 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -60,10 +60,21 @@ env.Library( '$BUILD_DIR/mongo/base', '$BUILD_DIR/mongo/db/index_names', '$BUILD_DIR/mongo/db/write_concern_options', + 'shutdown_in_progress_quiesce_info', ] ) env.Library( + target='shutdown_in_progress_quiesce_info', + source= [ + 'shutdown_in_progress_quiesce_info.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + ], +) + +env.Library( target='initialize_snmp', source= [ 'initialize_snmp.cpp', diff --git a/src/mongo/db/mongod_main.cpp b/src/mongo/db/mongod_main.cpp index 959f1e6e86e..8511c4b1a89 100644 --- a/src/mongo/db/mongod_main.cpp +++ b/src/mongo/db/mongod_main.cpp @@ -1083,7 +1083,7 @@ void shutdownTask(const ShutdownTaskArgs& shutdownArgs) { } if (auto replCoord = repl::ReplicationCoordinator::get(serviceContext); - replCoord && replCoord->enterQuiesceModeIfSecondary()) { + replCoord && replCoord->enterQuiesceModeIfSecondary(shutdownTimeout)) { ServiceContext::UniqueOperationContext uniqueOpCtx; OperationContext* opCtx = client->getOperationContext(); if (!opCtx) { diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp index d468560a8e9..7d81ee4ffb6 100644 --- a/src/mongo/db/operation_context.cpp +++ b/src/mongo/db/operation_context.cpp @@ -95,7 +95,9 @@ void OperationContext::setDeadlineAndMaxTime(Date_t when, ErrorCodes::Error timeoutError) { invariant(!getClient()->isInDirectClient() || _hasArtificialDeadline); invariant(ErrorCodes::isExceededTimeLimitError(timeoutError)); - invariant(!ErrorExtraInfo::parserFor(timeoutError)); + if (ErrorCodes::mustHaveExtraInfo(timeoutError)) { + invariant(!ErrorExtraInfo::parserFor(timeoutError)); + } uassert(40120, "Illegal attempt to change operation deadline", _hasArtificialDeadline || !hasDeadline()); @@ -347,7 +349,9 @@ StatusWith<stdx::cv_status> OperationContext::waitForConditionOrInterruptNoAsser void OperationContext::markKilled(ErrorCodes::Error killCode) { invariant(killCode != ErrorCodes::OK); - invariant(!ErrorExtraInfo::parserFor(killCode)); + if (ErrorCodes::mustHaveExtraInfo(killCode)) { + invariant(!ErrorExtraInfo::parserFor(killCode)); + } if (killCode == ErrorCodes::ClientDisconnect) { LOGV2(20883, "Interrupted operation as its client disconnected", "opId"_attr = getOpID()); diff --git a/src/mongo/db/operation_context_test.cpp b/src/mongo/db/operation_context_test.cpp index 15ae8c19f28..33beb6cf112 100644 --- a/src/mongo/db/operation_context_test.cpp +++ b/src/mongo/db/operation_context_test.cpp @@ -142,6 +142,14 @@ DEATH_TEST(OperationContextTest, CallingSetDeadlineWithExtraInfoCrashes, "invari opCtx->setDeadlineByDate(Date_t::now(), ErrorCodes::ForTestingErrorExtraInfo); } +TEST(OperationContextTest, CallingMarkKillWithOptionalExtraInfoSucceeds) { + auto serviceCtx = ServiceContext::make(); + auto client = serviceCtx->makeClient("OperationContextTest"); + auto opCtx = client->makeOperationContext(); + + opCtx->markKilled(ErrorCodes::ForTestingOptionalErrorExtraInfo); +} + TEST(OperationContextTest, OpCtxGroup) { OperationContextGroup group1; ASSERT_TRUE(group1.isEmpty()); diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp index a9d11d23ee8..4a6f2928365 100644 --- a/src/mongo/db/repl/bgsync.cpp +++ b/src/mongo/db/repl/bgsync.cpp @@ -57,6 +57,7 @@ #include "mongo/db/repl/rs_rollback.h" #include "mongo/db/repl/storage_interface.h" #include "mongo/db/s/shard_identity_rollback_notifier.h" +#include "mongo/db/shutdown_in_progress_quiesce_info.h" #include "mongo/logv2/log.h" #include "mongo/rpc/get_status_from_command_result.h" #include "mongo/rpc/metadata/repl_set_metadata.h" @@ -560,7 +561,7 @@ void BackgroundSync::_produce() { return; } - Seconds blacklistDuration(60); + Milliseconds blacklistDuration(60000); if (fetcherReturnStatus.code() == ErrorCodes::OplogOutOfOrder) { // This is bad because it means that our source // has not returned oplog entries in ascending ts order, and they need to be. @@ -599,6 +600,17 @@ void BackgroundSync::_produce() { "syncSource"_attr = source, "blacklistDuration"_attr = blacklistDuration); _replCoord->blacklistSyncSource(source, Date_t::now() + blacklistDuration); + } else if (fetcherReturnStatus.code() == ErrorCodes::ShutdownInProgress) { + if (auto quiesceInfo = fetcherReturnStatus.extraInfo<ShutdownInProgressQuiesceInfo>()) { + blacklistDuration = Milliseconds(quiesceInfo->getRemainingQuiesceTimeMillis()); + LOGV2_WARNING( + 4696201, + "Sync source was in quiesce mode while we were querying its oplog. Blacklisting " + "sync source", + "syncSource"_attr = source, + "blacklistDuration"_attr = blacklistDuration); + _replCoord->blacklistSyncSource(source, Date_t::now() + blacklistDuration); + } } else if (!fetcherReturnStatus.isOK()) { LOGV2_WARNING(21122, "Oplog fetcher stopped querying remote oplog with error: {error}", diff --git a/src/mongo/db/repl/oplog_fetcher.cpp b/src/mongo/db/repl/oplog_fetcher.cpp index 37cc0d9d4cd..7c664838887 100644 --- a/src/mongo/db/repl/oplog_fetcher.cpp +++ b/src/mongo/db/repl/oplog_fetcher.cpp @@ -897,6 +897,14 @@ Status OplogFetcher::_checkTooStaleToSyncFromSource(const OpTime lastFetched, bool OplogFetcher::OplogFetcherRestartDecisionDefault::shouldContinue(OplogFetcher* fetcher, Status status) { + // If we try to sync from a node that is shutting down, do not attempt to reconnect. + // We should choose a new sync source. + if (status.code() == ErrorCodes::ShutdownInProgress) { + LOGV2(4696202, + "Not recreating cursor for oplog fetcher because sync source is shutting down", + "error"_attr = redact(status)); + return false; + } if (_numRestarts == _maxRestarts) { LOGV2(21274, "Error returned from oplog query (no more query restarts left): {error}", diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h index a2f3d681853..a167963f00e 100644 --- a/src/mongo/db/repl/replication_coordinator.h +++ b/src/mongo/db/repl/replication_coordinator.h @@ -132,8 +132,11 @@ public: * with ShutdownInProgress. This function causes us to increment the topologyVersion and start * failing isMaster requests with ShutdownInProgress. Returns true if the server entered quiesce * mode. + * + * We take in quiesceTime only for reporting purposes. The waiting during quiesce mode happens + * external to the ReplicationCoordinator. */ - virtual bool enterQuiesceModeIfSecondary() = 0; + virtual bool enterQuiesceModeIfSecondary(Milliseconds quiesceTime) = 0; /** * Returns whether the server is in quiesce mode. diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 5f548f0d97b..01b9b230968 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -86,6 +86,7 @@ #include "mongo/db/repl/update_position_args.h" #include "mongo/db/repl/vote_requester.h" #include "mongo/db/server_options.h" +#include "mongo/db/shutdown_in_progress_quiesce_info.h" #include "mongo/db/storage/storage_options.h" #include "mongo/db/vector_clock.h" #include "mongo/db/vector_clock_mutable.h" @@ -221,8 +222,8 @@ StatusOrStatusWith<T> futureGetNoThrowWithDeadline(OperationContext* opCtx, } } -const Status kQuiesceModeShutdownStatus = - Status(ErrorCodes::ShutdownInProgress, "The server is in quiesce mode and will shut down"); +constexpr StringData kQuiesceModeShutdownMessage = + "The server is in quiesce mode and will shut down"_sd; } // namespace @@ -928,7 +929,7 @@ void ReplicationCoordinatorImpl::enterTerminalShutdown() { _inTerminalShutdown = true; } -bool ReplicationCoordinatorImpl::enterQuiesceModeIfSecondary() { +bool ReplicationCoordinatorImpl::enterQuiesceModeIfSecondary(Milliseconds quiesceTime) { LOGV2_INFO(4794602, "Attempting to enter quiesce mode"); stdx::lock_guard lk(_mutex); @@ -938,6 +939,7 @@ bool ReplicationCoordinatorImpl::enterQuiesceModeIfSecondary() { } _inQuiesceMode = true; + _quiesceDeadline = _replExecutor->now() + quiesceTime; // Increment the topology version and respond to all waiting isMaster requests with an error. _fulfillTopologyChangePromise(lk); @@ -2149,10 +2151,20 @@ void ReplicationCoordinatorImpl::updateAndLogStateTransitionMetrics( "metrics"_attr = bob.obj()); } +long long ReplicationCoordinatorImpl::_calculateRemainingQuiesceTimeMillis() const { + auto remainingQuiesceTimeMillis = + std::max(Milliseconds::zero(), _quiesceDeadline - _replExecutor->now()); + // Turn remainingQuiesceTimeMillis into an int64 so that it's a supported BSONElement. + long long remainingQuiesceTimeLong = durationCount<Milliseconds>(remainingQuiesceTimeMillis); + return remainingQuiesceTimeLong; +} + std::shared_ptr<IsMasterResponse> ReplicationCoordinatorImpl::_makeIsMasterResponse( boost::optional<StringData> horizonString, WithLock lock, const bool hasValidConfig) const { - uassert( - kQuiesceModeShutdownStatus.code(), kQuiesceModeShutdownStatus.reason(), !_inQuiesceMode); + + uassert(ShutdownInProgressQuiesceInfo(_calculateRemainingQuiesceTimeMillis()), + kQuiesceModeShutdownMessage, + !_inQuiesceMode); if (!hasValidConfig) { auto response = std::make_shared<IsMasterResponse>(); @@ -2195,8 +2207,9 @@ ReplicationCoordinatorImpl::_getIsMasterResponseFuture( boost::optional<StringData> horizonString, boost::optional<TopologyVersion> clientTopologyVersion) { - uassert( - kQuiesceModeShutdownStatus.code(), kQuiesceModeShutdownStatus.reason(), !_inQuiesceMode); + uassert(ShutdownInProgressQuiesceInfo(_calculateRemainingQuiesceTimeMillis()), + kQuiesceModeShutdownMessage, + !_inQuiesceMode); const bool hasValidConfig = horizonString != boost::none; @@ -3986,7 +3999,9 @@ void ReplicationCoordinatorImpl::_fulfillTopologyChangePromise(WithLock lock) { iter != _horizonToTopologyChangePromiseMap.end(); iter++) { if (_inQuiesceMode) { - iter->second->setError(kQuiesceModeShutdownStatus); + iter->second->setError( + Status(ShutdownInProgressQuiesceInfo(_calculateRemainingQuiesceTimeMillis()), + kQuiesceModeShutdownMessage)); } else { StringData horizonString = iter->first; auto response = _makeIsMasterResponse(horizonString, lock, hasValidConfig); diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index 6c39ff5d294..e21f7f8a434 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -106,7 +106,7 @@ public: virtual void enterTerminalShutdown() override; - virtual bool enterQuiesceModeIfSecondary() override; + virtual bool enterQuiesceModeIfSecondary(Milliseconds quiesceTime) override; virtual bool inQuiesceMode() const override; @@ -1180,6 +1180,11 @@ private: StatusWith<int> myIndex); /** + * Calculates the time (in millis) left in quiesce mode and converts the value to int64. + */ + long long _calculateRemainingQuiesceTimeMillis() const; + + /** * Fills an IsMasterResponse with the appropriate replication related fields. horizonString * should be passed in if hasValidConfig is true. */ @@ -1651,6 +1656,9 @@ private: // If we're in quiesce mode. If true, we'll respond to isMaster requests with ok:0. bool _inQuiesceMode = false; // (M) + // The deadline until which quiesce mode will last. + Date_t _quiesceDeadline; // (M) + // The cached value of the 'counter' field in the server's TopologyVersion. AtomicWord<int64_t> _cachedTopologyVersionCounter; // (S) }; diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index 229505dce63..e8467ca278f 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -62,6 +62,7 @@ #include "mongo/db/repl/update_position_args.h" #include "mongo/db/server_options.h" #include "mongo/db/service_context.h" +#include "mongo/db/shutdown_in_progress_quiesce_info.h" #include "mongo/db/write_concern_options.h" #include "mongo/executor/network_interface_mock.h" #include "mongo/logv2/log.h" @@ -3309,7 +3310,7 @@ TEST_F(ReplCoordTest, IsMasterReturnsErrorOnEnteringQuiesceMode) { // Ensure that awaitIsMasterResponse() is called before entering quiesce mode. waitForIsMasterFailPoint->waitForTimesEntered(timesEnteredFailPoint + 1); - ASSERT(getReplCoord()->enterQuiesceModeIfSecondary()); + ASSERT(getReplCoord()->enterQuiesceModeIfSecondary(Milliseconds(0))); ASSERT_EQUALS(currentTopologyVersion.getCounter() + 1, getTopoCoord().getTopologyVersion().getCounter()); // Check that the cached topologyVersion counter was updated correctly. @@ -3355,7 +3356,7 @@ TEST_F(ReplCoordTest, IsMasterReturnsErrorOnEnteringQuiesceModeAfterWaitingTimes // Ensure that waiting for a topology change timed out before entering quiesce mode. failPoint->waitForTimesEntered(timesEnteredFailPoint + 1); - ASSERT(getReplCoord()->enterQuiesceModeIfSecondary()); + ASSERT(getReplCoord()->enterQuiesceModeIfSecondary(Milliseconds(0))); failPoint->setMode(FailPoint::off, 0); // Advance the clock so that pauseWhileSet() will wake up. @@ -3380,7 +3381,7 @@ TEST_F(ReplCoordTest, IsMasterReturnsErrorInQuiesceMode) { ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); auto currentTopologyVersion = getTopoCoord().getTopologyVersion(); - ASSERT(getReplCoord()->enterQuiesceModeIfSecondary()); + ASSERT(getReplCoord()->enterQuiesceModeIfSecondary(Milliseconds(1000))); ASSERT_EQUALS(currentTopologyVersion.getCounter() + 1, getTopoCoord().getTopologyVersion().getCounter()); // Check that the cached topologyVersion counter was updated correctly. @@ -3419,14 +3420,68 @@ TEST_F(ReplCoordTest, IsMasterReturnsErrorInQuiesceMode) { getReplCoord()->awaitIsMasterResponse(opCtx.get(), {}, boost::none, boost::none), AssertionException, ErrorCodes::ShutdownInProgress); + + // Check that status includes an extraErrorInfo class. Since we did not advance the clock, we + // should still have the full quiesceTime as our remaining quiesceTime. + try { + getReplCoord()->awaitIsMasterResponse(opCtx.get(), {}, currentTopologyVersion, deadline); + } catch (const DBException& ex) { + ASSERT(ex.extraInfo()); + ASSERT(ex.extraInfo<ShutdownInProgressQuiesceInfo>()); + ASSERT_EQ(ex.extraInfo<ShutdownInProgressQuiesceInfo>()->getRemainingQuiesceTimeMillis(), + 1000); + } +} + +TEST_F(ReplCoordTest, QuiesceModeErrorsReturnAccurateRemainingQuiesceTime) { + init(); + assertStartSuccess(BSON("_id" + << "mySet" + << "version" << 1 << "members" + << BSON_ARRAY(BSON("host" + << "node1:12345" + << "_id" << 0))), + HostAndPort("node1", 12345)); + ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); + + auto currentTopologyVersion = getTopoCoord().getTopologyVersion(); + auto totalQuiesceTime = Milliseconds(1000); + ASSERT(getReplCoord()->enterQuiesceModeIfSecondary(totalQuiesceTime)); + ASSERT_EQUALS(currentTopologyVersion.getCounter() + 1, + getTopoCoord().getTopologyVersion().getCounter()); + // Check that the cached topologyVersion counter was updated correctly. + ASSERT_EQUALS(getTopoCoord().getTopologyVersion().getCounter(), + getReplCoord()->getTopologyVersion().getCounter()); + + auto opCtx = makeOperationContext(); + auto maxAwaitTime = Milliseconds(5000); + auto deadline = getNet()->now() + maxAwaitTime; + auto halfwayThroughQuiesce = getNet()->now() + totalQuiesceTime / 2; + + getNet()->enterNetwork(); + // Advance the clock halfway to the quiesce deadline. + getNet()->advanceTime(halfwayThroughQuiesce); + getNet()->exitNetwork(); + + // Check that status includes an extraErrorInfo class. Since we advanced the clock halfway to + // the quiesce deadline, we should have half of the total quiesceTime left, 500 ms. + try { + getReplCoord()->awaitIsMasterResponse(opCtx.get(), {}, currentTopologyVersion, deadline); + } catch (const DBException& ex) { + ASSERT(ex.extraInfo()); + ASSERT(ex.extraInfo<ShutdownInProgressQuiesceInfo>()); + ASSERT_EQ(ex.extraInfo<ShutdownInProgressQuiesceInfo>()->getRemainingQuiesceTimeMillis(), + 500); + } } + TEST_F(ReplCoordTest, DoNotEnterQuiesceModeInStatesOtherThanSecondary) { init(); // Do not enter quiesce mode in state RS_STARTUP. ASSERT_TRUE(getReplCoord()->getMemberState().startup()); - ASSERT_FALSE(getReplCoord()->enterQuiesceModeIfSecondary()); + ASSERT_FALSE(getReplCoord()->enterQuiesceModeIfSecondary(Milliseconds(0))); assertStartSuccess(BSON("_id" << "mySet" @@ -3441,7 +3496,7 @@ TEST_F(ReplCoordTest, DoNotEnterQuiesceModeInStatesOtherThanSecondary) { // Do not enter quiesce mode in state RS_STARTUP2. ASSERT_TRUE(getReplCoord()->getMemberState().startup2()); - ASSERT_FALSE(getReplCoord()->enterQuiesceModeIfSecondary()); + ASSERT_FALSE(getReplCoord()->enterQuiesceModeIfSecondary(Milliseconds(0))); // Become primary. ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); @@ -3451,7 +3506,7 @@ TEST_F(ReplCoordTest, DoNotEnterQuiesceModeInStatesOtherThanSecondary) { ASSERT(getReplCoord()->getMemberState().primary()); // Do not enter quiesce mode in state RS_PRIMARY. - ASSERT_FALSE(getReplCoord()->enterQuiesceModeIfSecondary()); + ASSERT_FALSE(getReplCoord()->enterQuiesceModeIfSecondary(Milliseconds(0))); } TEST_F(ReplCoordTest, IsMasterReturnsErrorInQuiesceModeWhenNodeIsRemoved) { @@ -3471,7 +3526,7 @@ TEST_F(ReplCoordTest, IsMasterReturnsErrorInQuiesceModeWhenNodeIsRemoved) { // Enter quiesce mode. Test that we increment the topology version. auto topologyVersionBeforeQuiesceMode = getTopoCoord().getTopologyVersion(); - ASSERT(getReplCoord()->enterQuiesceModeIfSecondary()); + ASSERT(getReplCoord()->enterQuiesceModeIfSecondary(Milliseconds(0))); auto topologyVersionAfterQuiesceMode = getTopoCoord().getTopologyVersion(); ASSERT_EQUALS(topologyVersionBeforeQuiesceMode.getCounter() + 1, topologyVersionAfterQuiesceMode.getCounter()); diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp index 4eb5cf31cf6..acb67df79d9 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_mock.cpp @@ -81,7 +81,7 @@ void ReplicationCoordinatorMock::enterTerminalShutdown() { // TODO } -bool ReplicationCoordinatorMock::enterQuiesceModeIfSecondary() { +bool ReplicationCoordinatorMock::enterQuiesceModeIfSecondary(Milliseconds quiesceTime) { // TODO return true; } diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h index fbfb8c6887a..bc15f442e31 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.h +++ b/src/mongo/db/repl/replication_coordinator_mock.h @@ -70,7 +70,7 @@ public: virtual void enterTerminalShutdown(); - virtual bool enterQuiesceModeIfSecondary(); + virtual bool enterQuiesceModeIfSecondary(Milliseconds quieseTime); virtual bool inQuiesceMode() const; diff --git a/src/mongo/db/repl/replication_coordinator_noop.cpp b/src/mongo/db/repl/replication_coordinator_noop.cpp index 18af1802c8d..e28f87c88f0 100644 --- a/src/mongo/db/repl/replication_coordinator_noop.cpp +++ b/src/mongo/db/repl/replication_coordinator_noop.cpp @@ -41,7 +41,7 @@ void ReplicationCoordinatorNoOp::startup(OperationContext* opCtx) {} void ReplicationCoordinatorNoOp::enterTerminalShutdown() {} -bool ReplicationCoordinatorNoOp::enterQuiesceModeIfSecondary() { +bool ReplicationCoordinatorNoOp::enterQuiesceModeIfSecondary(Milliseconds quiesceTime) { MONGO_UNREACHABLE; } diff --git a/src/mongo/db/repl/replication_coordinator_noop.h b/src/mongo/db/repl/replication_coordinator_noop.h index 901873341f5..d1a7cc7937f 100644 --- a/src/mongo/db/repl/replication_coordinator_noop.h +++ b/src/mongo/db/repl/replication_coordinator_noop.h @@ -51,7 +51,7 @@ public: void enterTerminalShutdown() final; - bool enterQuiesceModeIfSecondary() final; + bool enterQuiesceModeIfSecondary(Milliseconds quiesceTime) final; bool inQuiesceMode() const final; diff --git a/src/mongo/db/repl/topology_version_observer_test.cpp b/src/mongo/db/repl/topology_version_observer_test.cpp index a3ba3717eac..3346974649b 100644 --- a/src/mongo/db/repl/topology_version_observer_test.cpp +++ b/src/mongo/db/repl/topology_version_observer_test.cpp @@ -220,7 +220,7 @@ TEST_F(TopologyVersionObserverTest, HandleQuiesceMode) { // Enter quiesce mode in the replication coordinator to make shutdown errors come from // awaitIsMasterResponseFuture()/getIsMasterResponseFuture(). auto opCtx = makeOperationContext(); - getReplCoord()->enterQuiesceModeIfSecondary(); + getReplCoord()->enterQuiesceModeIfSecondary(Milliseconds(0)); getNet()->enterNetwork(); getNet()->advanceTime(getNet()->now() + sleepTime); diff --git a/src/mongo/db/shutdown_in_progress_quiesce_info.cpp b/src/mongo/db/shutdown_in_progress_quiesce_info.cpp new file mode 100644 index 00000000000..75cd4fe7204 --- /dev/null +++ b/src/mongo/db/shutdown_in_progress_quiesce_info.cpp @@ -0,0 +1,54 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/shutdown_in_progress_quiesce_info.h" + +#include "mongo/base/init.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/util/assert_util.h" + +namespace mongo { +namespace { + +MONGO_INIT_REGISTER_ERROR_EXTRA_INFO(ShutdownInProgressQuiesceInfo); + +} // namespace + +void ShutdownInProgressQuiesceInfo::serialize(BSONObjBuilder* bob) const { + bob->append("remainingQuiesceTimeMillis", _remainingQuiesceTimeMillis); +} + +std::shared_ptr<const ErrorExtraInfo> ShutdownInProgressQuiesceInfo::parse(const BSONObj& obj) { + return std::make_shared<ShutdownInProgressQuiesceInfo>( + obj["remainingQuiesceTimeMillis"].safeNumberLong()); +} + +} // namespace mongo
\ No newline at end of file diff --git a/src/mongo/db/shutdown_in_progress_quiesce_info.h b/src/mongo/db/shutdown_in_progress_quiesce_info.h new file mode 100644 index 00000000000..000d286230d --- /dev/null +++ b/src/mongo/db/shutdown_in_progress_quiesce_info.h @@ -0,0 +1,61 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/base/error_extra_info.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonobjbuilder.h" + +namespace mongo { + +/** + * Represents an error returned from a mongod or a mongos when it is in quiesce mode. The + * error information defined here includes the remaining time the node has left + * in quiesce mode. + */ +class ShutdownInProgressQuiesceInfo final : public ErrorExtraInfo { +public: + static constexpr auto code = ErrorCodes::ShutdownInProgress; + + ShutdownInProgressQuiesceInfo(long long remainingQuiesceTimeMillis) + : _remainingQuiesceTimeMillis(remainingQuiesceTimeMillis) {} + + const auto& getRemainingQuiesceTimeMillis() const { + return _remainingQuiesceTimeMillis; + } + + void serialize(BSONObjBuilder* bob) const override; + static std::shared_ptr<const ErrorExtraInfo> parse(const BSONObj&); + +private: + long long _remainingQuiesceTimeMillis = 0; +}; + +} // namespace mongo
\ No newline at end of file diff --git a/src/mongo/embedded/replication_coordinator_embedded.cpp b/src/mongo/embedded/replication_coordinator_embedded.cpp index f1643f0e0ba..1a788a500be 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.cpp +++ b/src/mongo/embedded/replication_coordinator_embedded.cpp @@ -51,7 +51,7 @@ void ReplicationCoordinatorEmbedded::startup(OperationContext* opCtx) {} void ReplicationCoordinatorEmbedded::enterTerminalShutdown() {} -bool ReplicationCoordinatorEmbedded::enterQuiesceModeIfSecondary() { +bool ReplicationCoordinatorEmbedded::enterQuiesceModeIfSecondary(Milliseconds quiesceTime) { return true; } diff --git a/src/mongo/embedded/replication_coordinator_embedded.h b/src/mongo/embedded/replication_coordinator_embedded.h index f8bd0dbc15c..e43a2f00f8f 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.h +++ b/src/mongo/embedded/replication_coordinator_embedded.h @@ -49,7 +49,7 @@ public: void enterTerminalShutdown() override; - bool enterQuiesceModeIfSecondary() override; + bool enterQuiesceModeIfSecondary(Milliseconds quiesceTime) override; bool inQuiesceMode() const override; diff --git a/src/mongo/s/SConscript b/src/mongo/s/SConscript index 7745088ead9..9c50eb8fd00 100644 --- a/src/mongo/s/SConscript +++ b/src/mongo/s/SConscript @@ -356,6 +356,7 @@ env.Library( 'mongos_topology_coordinator.cpp', ], LIBDEPS=[ + '$BUILD_DIR/mongo/db/common', '$BUILD_DIR/mongo/transport/transport_layer_common', '$BUILD_DIR/mongo/util/fail_point', '$BUILD_DIR/mongo/rpc/metadata', diff --git a/src/mongo/s/mongos_topology_coordinator.cpp b/src/mongo/s/mongos_topology_coordinator.cpp index 9d762c8d0c1..d1c27a5a3ac 100644 --- a/src/mongo/s/mongos_topology_coordinator.cpp +++ b/src/mongo/s/mongos_topology_coordinator.cpp @@ -33,6 +33,7 @@ #include "mongo/db/client.h" #include "mongo/db/service_context.h" +#include "mongo/db/shutdown_in_progress_quiesce_info.h" #include "mongo/s/mongos_topology_coordinator.h" #include "mongo/util/fail_point.h" @@ -75,11 +76,11 @@ StatusOrStatusWith<T> futureGetNoThrowWithDeadline(OperationContext* opCtx, } /** - * ShutdownInProgress error + * ShutdownInProgress error message */ -const Status kQuiesceModeShutdownStatus = - Status(ErrorCodes::ShutdownInProgress, "Mongos is in quiesce mode and will shut down"); +constexpr StringData kQuiesceModeShutdownMessage = + "Mongos is in quiesce mode and will shut down"_sd; } // namespace @@ -93,13 +94,23 @@ MongosTopologyCoordinator::MongosTopologyCoordinator() _inQuiesceMode(false), _promise(std::make_shared<SharedPromise<std::shared_ptr<const MongosIsMasterResponse>>>()) {} +long long MongosTopologyCoordinator::_calculateRemainingQuiesceTimeMillis() const { + auto preciseClock = getGlobalServiceContext()->getPreciseClockSource(); + auto remainingQuiesceTimeMillis = + std::max(Milliseconds::zero(), _quiesceDeadline - preciseClock->now()); + // Turn remainingQuiesceTimeMillis into an int64 so that it's a supported BSONElement. + long long remainingQuiesceTimeLong = durationCount<Milliseconds>(remainingQuiesceTimeMillis); + return remainingQuiesceTimeLong; +} + std::shared_ptr<MongosIsMasterResponse> MongosTopologyCoordinator::_makeIsMasterResponse( WithLock lock) const { // It's possible for us to transition to Quiesce Mode after an isMaster request timed out. // Check that we are not in Quiesce Mode before returning a response to avoid responding with // a higher topology version, but no indication that we are shutting down. - uassert( - kQuiesceModeShutdownStatus.code(), kQuiesceModeShutdownStatus.reason(), !_inQuiesceMode); + uassert(ShutdownInProgressQuiesceInfo(_calculateRemainingQuiesceTimeMillis()), + kQuiesceModeShutdownMessage, + !_inQuiesceMode); auto response = std::make_shared<MongosIsMasterResponse>(_topologyVersion); return response; @@ -113,8 +124,9 @@ std::shared_ptr<const MongosIsMasterResponse> MongosTopologyCoordinator::awaitIs // Fail all new isMaster requests with ShutdownInProgress if we've transitioned to Quiesce // Mode. - uassert( - kQuiesceModeShutdownStatus.code(), kQuiesceModeShutdownStatus.reason(), !_inQuiesceMode); + uassert(ShutdownInProgressQuiesceInfo(_calculateRemainingQuiesceTimeMillis()), + kQuiesceModeShutdownMessage, + !_inQuiesceMode); // Respond immediately if: // (1) There is no clientTopologyVersion, which indicates that the client is not using @@ -182,12 +194,14 @@ void MongosTopologyCoordinator::enterQuiesceModeAndWait(OperationContext* opCtx, { stdx::lock_guard lk(_mutex); _inQuiesceMode = true; + _quiesceDeadline = getGlobalServiceContext()->getPreciseClockSource()->now() + quiesceTime; // Increment the topology version and respond to any waiting isMaster request with an error. auto counter = _topologyVersion.getCounter(); _topologyVersion.setCounter(counter + 1); _promise->setError( - {ErrorCodes::ShutdownInProgress, "Mongos is in quiesce mode and will shut down"}); + Status(ShutdownInProgressQuiesceInfo(_calculateRemainingQuiesceTimeMillis()), + kQuiesceModeShutdownMessage)); // Reset counter to 0 since we will respond to all waiting isMaster requests with an error. // All new isMaster requests will immediately fail with ShutdownInProgress. @@ -200,7 +214,7 @@ void MongosTopologyCoordinator::enterQuiesceModeAndWait(OperationContext* opCtx, } LOGV2(4695701, "Entering quiesce mode for mongos shutdown", "quiesceTime"_attr = quiesceTime); - opCtx->sleepFor(quiesceTime); + opCtx->sleepUntil(_quiesceDeadline); LOGV2(4695702, "Exiting quiesce mode for mongos shutdown"); } diff --git a/src/mongo/s/mongos_topology_coordinator.h b/src/mongo/s/mongos_topology_coordinator.h index 5278bf50c23..bb04306f33d 100644 --- a/src/mongo/s/mongos_topology_coordinator.h +++ b/src/mongo/s/mongos_topology_coordinator.h @@ -88,6 +88,11 @@ private: SharedPromise<std::shared_ptr<const MongosIsMasterResponse>>; /** + * Calculates the time (in millis) left in quiesce mode and converts the value to int64. + */ + long long _calculateRemainingQuiesceTimeMillis() const; + + /** * Helper for constructing a MongosIsMasterResponse. **/ std::shared_ptr<MongosIsMasterResponse> _makeIsMasterResponse(WithLock) const; @@ -107,6 +112,9 @@ private: // True if we're in quiesce mode. If true, we'll respond to isMaster requests with ok:0. bool _inQuiesceMode; // (M) + // The deadline until which quiesce mode will last. + Date_t _quiesceDeadline; // (M) + // The promise waited on by awaitable isMaster requests on mongos. std::shared_ptr<SharedPromiseOfMongosIsMasterResponse> _promise; // (M) }; |