diff options
author | Jiawei Yang <jiawei.yang@mongodb.com> | 2023-04-25 22:43:17 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-26 04:08:30 +0000 |
commit | 606e34054ef33e59b78715263b125ff7ebea1394 (patch) | |
tree | 85f8e6b1256096da4facc919bc6123db693a437f | |
parent | 5c1f588bfa4ed2edbeb3abbb26e952e08641da14 (diff) | |
download | mongo-606e34054ef33e59b78715263b125ff7ebea1394.tar.gz |
SERVER-70127 change system operations to be killable by default
131 files changed, 687 insertions, 343 deletions
diff --git a/src/mongo/db/auth/user_cache_invalidator_job.cpp b/src/mongo/db/auth/user_cache_invalidator_job.cpp index d7c599f1fd9..caa0939b3e0 100644 --- a/src/mongo/db/auth/user_cache_invalidator_job.cpp +++ b/src/mongo/db/auth/user_cache_invalidator_job.cpp @@ -145,7 +145,9 @@ void UserCacheInvalidator::start(ServiceContext* serviceCtx, OperationContext* o PeriodicRunner::PeriodicJob job( "UserCacheInvalidator", [serviceCtx](Client* client) { getUserCacheInvalidator(serviceCtx)->run(); }, - loadInterval()); + loadInterval(), + // TODO(SERVER-74660): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/); invalidator->_job = std::make_unique<PeriodicJobAnchor>(periodicRunner->makeJob(std::move(job))); diff --git a/src/mongo/db/catalog/collection_catalog_bm.cpp b/src/mongo/db/catalog/collection_catalog_bm.cpp index 0d033653188..a78b9d1e3d6 100644 --- a/src/mongo/db/catalog/collection_catalog_bm.cpp +++ b/src/mongo/db/catalog/collection_catalog_bm.cpp @@ -107,6 +107,12 @@ void BM_CollectionCatalogWriteBatchedWithGlobalExclusiveLock(benchmark::State& s ThreadClient threadClient(serviceContext); ServiceContext::UniqueOperationContext opCtx = threadClient->makeOperationContext(); + // TODO(SERVER-74657): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*threadClient.get()); + threadClient.get()->setSystemOperationUnkillableByStepdown(lk); + } + createCollections(opCtx.get(), state.range(0)); Lock::GlobalLock globalLk(opCtx.get(), MODE_X); diff --git a/src/mongo/db/change_collection_expired_documents_remover.cpp b/src/mongo/db/change_collection_expired_documents_remover.cpp index 083256e8859..5dc4d1fe7e6 100644 --- a/src/mongo/db/change_collection_expired_documents_remover.cpp +++ b/src/mongo/db/change_collection_expired_documents_remover.cpp @@ -174,7 +174,11 @@ public: ChangeCollectionExpiredDocumentsRemover(ServiceContext* serviceContext) { const auto period = Seconds{gChangeCollectionExpiredDocumentsRemoverJobSleepSeconds.load()}; _jobAnchor = serviceContext->getPeriodicRunner()->makeJob( - {"ChangeCollectionExpiredDocumentsRemover", removeExpiredDocuments, period}); + {"ChangeCollectionExpiredDocumentsRemover", + removeExpiredDocuments, + period, + // TODO(SERVER-74662): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/}); _jobAnchor.start(); } diff --git a/src/mongo/db/client.h b/src/mongo/db/client.h index 39c9b4703e4..1610f8ff2e8 100644 --- a/src/mongo/db/client.h +++ b/src/mongo/db/client.h @@ -41,6 +41,7 @@ #include "mongo/db/namespace_string.h" #include "mongo/db/service_context.h" +#include "mongo/logv2/log.h" #include "mongo/platform/atomic_word.h" #include "mongo/platform/random.h" #include "mongo/stdx/thread.h" @@ -194,15 +195,15 @@ public: } /** - * Used to mark system operations that are allowed to be killed by the stepdown process. This - * should only be called once per Client and only from system connections. The Client should be - * locked by the caller. + * Used to mark system operations that are not allowed to be killed by the stepdown process. + * This should only be called once per Client and only from system connections. The Client + * should be locked by the caller. */ - void setSystemOperationKillableByStepdown(WithLock) { + void setSystemOperationUnkillableByStepdown(WithLock) { // This can only be changed once for system operations. invariant(isFromSystemConnection()); - invariant(!_systemOperationKillable); - _systemOperationKillable = true; + invariant(_systemOperationKillable); + _systemOperationKillable = false; } /** @@ -294,7 +295,7 @@ private: OperationContext* _opCtx = nullptr; // If the active system client operation is allowed to be killed. - bool _systemOperationKillable = false; + bool _systemOperationKillable = true; PseudoRandom _prng; diff --git a/src/mongo/db/clientcursor.cpp b/src/mongo/db/clientcursor.cpp index db7a5253ef3..90c3a4581cc 100644 --- a/src/mongo/db/clientcursor.cpp +++ b/src/mongo/db/clientcursor.cpp @@ -357,6 +357,13 @@ public: void run() { ThreadClient tc("clientcursormon", getGlobalServiceContext()); + + // TODO(SERVER-74662): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + while (!globalInShutdownDeprecated()) { { const ServiceContext::UniqueOperationContext opCtx = cc().makeOperationContext(); diff --git a/src/mongo/db/commands/create_indexes_cmd.cpp b/src/mongo/db/commands/create_indexes_cmd.cpp index be6ef5c15e2..ce94c92662a 100644 --- a/src/mongo/db/commands/create_indexes_cmd.cpp +++ b/src/mongo/db/commands/create_indexes_cmd.cpp @@ -640,12 +640,6 @@ CreateIndexesReply runCreateIndexesWithCoordinator(OperationContext* opCtx, // The current OperationContext may be interrupted, which would prevent us from // taking locks. Use a new OperationContext to abort the index build. auto newClient = opCtx->getServiceContext()->makeClient("abort-index-build"); - - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient.get()->setSystemOperationKillableByStepdown(lk); - } - AlternativeClientRegion acr(newClient); const auto abortCtx = cc().makeOperationContext(); diff --git a/src/mongo/db/commands/dbcheck.cpp b/src/mongo/db/commands/dbcheck.cpp index 312431536a2..e329d1d781a 100644 --- a/src/mongo/db/commands/dbcheck.cpp +++ b/src/mongo/db/commands/dbcheck.cpp @@ -302,12 +302,6 @@ protected: virtual void run() override { // Every dbCheck runs in its own client. ThreadClient tc(name(), getGlobalServiceContext()); - - { - stdx::lock_guard<Client> lk(*tc.get()); - tc.get()->setSystemOperationKillableByStepdown(lk); - } - auto uniqueOpCtx = tc->makeOperationContext(); auto opCtx = uniqueOpCtx.get(); diff --git a/src/mongo/db/commands/fsync.cpp b/src/mongo/db/commands/fsync.cpp index 795adf38540..581501ea841 100644 --- a/src/mongo/db/commands/fsync.cpp +++ b/src/mongo/db/commands/fsync.cpp @@ -356,6 +356,12 @@ void FSyncLockThread::run() { stdx::lock_guard<SimpleMutex> lkf(filesLockedFsync); stdx::unique_lock<Latch> stateLock(fsyncStateMutex); + // TODO(SERVER-74657): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + invariant(fsyncCmd.getLockCount_inLock() == 1); try { diff --git a/src/mongo/db/commands/user_management_commands.cpp b/src/mongo/db/commands/user_management_commands.cpp index e6b6a3cdae0..e93ae15ab1e 100644 --- a/src/mongo/db/commands/user_management_commands.cpp +++ b/src/mongo/db/commands/user_management_commands.cpp @@ -731,6 +731,13 @@ public: // Subclient used by transaction operations. _client = opCtx->getServiceContext()->makeClient(forCommand.toString()); + + // TODO(SERVER-74660): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*_client.get()); + _client.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto as = AuthorizationSession::get(_client.get()); if (as) { as->grantInternalAuthorization(_client.get()); diff --git a/src/mongo/db/concurrency/deferred_writer.cpp b/src/mongo/db/concurrency/deferred_writer.cpp index 0ec030345ea..558f3e282cc 100644 --- a/src/mongo/db/concurrency/deferred_writer.cpp +++ b/src/mongo/db/concurrency/deferred_writer.cpp @@ -151,9 +151,6 @@ void DeferredWriter::startup(std::string workerName) { options.maxThreads = 1; options.onCreateThread = [](const std::string& name) { Client::initThread(name); - - stdx::lock_guard<Client> lk(cc()); - cc().setSystemOperationKillableByStepdown(lk); }; _pool = std::make_unique<ThreadPool>(options); _pool->startup(); diff --git a/src/mongo/db/exec/sbe/stages/exchange.cpp b/src/mongo/db/exec/sbe/stages/exchange.cpp index 02b5c3ce7d0..f0642818710 100644 --- a/src/mongo/db/exec/sbe/stages/exchange.cpp +++ b/src/mongo/db/exec/sbe/stages/exchange.cpp @@ -46,6 +46,12 @@ MONGO_INITIALIZER(s_globalThreadPool)(InitializerContext* context) { options.maxThreads = 128; options.onCreateThread = [](const std::string& name) { Client::initThread(name); + + // TODO(SERVER-74662): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } }; s_globalThreadPool = std::make_unique<ThreadPool>(options); s_globalThreadPool->startup(); diff --git a/src/mongo/db/fle_crud.cpp b/src/mongo/db/fle_crud.cpp index a37b7f8169b..7c41c7374d7 100644 --- a/src/mongo/db/fle_crud.cpp +++ b/src/mongo/db/fle_crud.cpp @@ -1456,6 +1456,13 @@ BSONObj FLEQueryInterfaceImpl::getById(const NamespaceString& nss, BSONElement e uint64_t FLEQueryInterfaceImpl::countDocuments(const NamespaceString& nss) { // Since count() does not work in a transaction, call count() by bypassing the transaction api auto client = _serviceContext->makeClient("SEP-int-fle-crud"); + + // TODO(SERVER-74660): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + AlternativeClientRegion clientRegion(client); auto opCtx = cc().makeOperationContext(); auto as = AuthorizationSession::get(cc()); @@ -1794,6 +1801,13 @@ std::vector<std::vector<FLEEdgeCountInfo>> FLETagNoTXNQuery::getTags( // Pop off the current op context so we can get a fresh set of read concern settings auto client = _opCtx->getServiceContext()->makeClient("FLETagNoTXNQuery"); + + // TODO(SERVER-74660): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + AlternativeClientRegion clientRegion(client); auto opCtx = cc().makeOperationContext(); auto as = AuthorizationSession::get(cc()); diff --git a/src/mongo/db/free_mon/free_mon_mongod.cpp b/src/mongo/db/free_mon/free_mon_mongod.cpp index bf247415dc8..05005bf2c89 100644 --- a/src/mongo/db/free_mon/free_mon_mongod.cpp +++ b/src/mongo/db/free_mon/free_mon_mongod.cpp @@ -81,6 +81,10 @@ auto makeTaskExecutor(ServiceContext* /*serviceContext*/) { tpOptions.maxThreads = 2; tpOptions.onCreateThread = [](const std::string& threadName) { Client::initThread(threadName.c_str()); + + // TODO(SERVER-74659): Please revisit if this thread could be made killable. + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); }; return std::make_unique<executor::ThreadPoolTaskExecutor>( std::make_unique<ThreadPool>(tpOptions), executor::makeNetworkInterface("FreeMonNet")); diff --git a/src/mongo/db/free_mon/free_mon_processor.cpp b/src/mongo/db/free_mon/free_mon_processor.cpp index d42dc473d14..872b5d436f4 100644 --- a/src/mongo/db/free_mon/free_mon_processor.cpp +++ b/src/mongo/db/free_mon/free_mon_processor.cpp @@ -165,6 +165,12 @@ void FreeMonProcessor::run() { Client::initThread("FreeMonProcessor"); Client* client = &cc(); + // TODO(SERVER-74659): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*client); + client->setSystemOperationUnkillableByStepdown(lk); + } + while (true) { auto item = _queue.dequeue(client->getServiceContext()->getPreciseClockSource()); if (!item.has_value()) { diff --git a/src/mongo/db/ftdc/controller.cpp b/src/mongo/db/ftdc/controller.cpp index c2c409be1d8..6ad4c10784b 100644 --- a/src/mongo/db/ftdc/controller.cpp +++ b/src/mongo/db/ftdc/controller.cpp @@ -196,6 +196,12 @@ void FTDCController::doLoop() noexcept { Client::initThread(kFTDCThreadName); Client* client = &cc(); + // TODO(SERVER-74659): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*client); + client->setSystemOperationUnkillableByStepdown(lk); + } + // Update config { stdx::lock_guard<Latch> lock(_mutex); diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 86cf52d6f56..e95b94c33d3 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -1652,7 +1652,6 @@ void IndexBuildsCoordinator::onStepUp(OperationContext* opCtx) { PromiseAndFuture<void> promiseAndFuture; _stepUpThread = stdx::thread([this, &promiseAndFuture] { Client::initThread("IndexBuildsCoordinator-StepUp"); - auto threadCtx = Client::getCurrent()->makeOperationContext(); threadCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE(); promiseAndFuture.promise.emplaceValue(); @@ -2549,6 +2548,13 @@ namespace { template <typename Func> void runOnAlternateContext(OperationContext* opCtx, std::string name, Func func) { auto newClient = opCtx->getServiceContext()->makeClient(name); + + // TODO(SERVER-74657): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*newClient.get()); + newClient.get()->setSystemOperationUnkillableByStepdown(lk); + } + AlternativeClientRegion acr(newClient); const auto newCtx = cc().makeOperationContext(); func(newCtx.get()); diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp index 74aa4eb3265..5a5773b1877 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.cpp +++ b/src/mongo/db/index_builds_coordinator_mongod.cpp @@ -93,6 +93,9 @@ ThreadPool::Options makeDefaultThreadPoolOptions() { // Ensure all threads have a client. options.onCreateThread = [](const std::string& threadName) { Client::initThread(threadName.c_str()); + + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); }; return options; diff --git a/src/mongo/db/introspect.cpp b/src/mongo/db/introspect.cpp index b5aecf38196..81e6b1aaca0 100644 --- a/src/mongo/db/introspect.cpp +++ b/src/mongo/db/introspect.cpp @@ -95,6 +95,13 @@ void profile(OperationContext* opCtx, NetworkOp op) { // killed or timed out. Those are the case we want to have profiling data. auto newClient = opCtx->getServiceContext()->makeClient("profiling"); auto newCtx = newClient->makeOperationContext(); + + // TODO(SERVER-74657): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*newClient.get()); + newClient.get()->setSystemOperationUnkillableByStepdown(lk); + } + // We swap the lockers as that way we preserve locks held in transactions and any other // options set for the locker like maxLockTimeout. auto oldLocker = opCtx->getClient()->swapLockState( diff --git a/src/mongo/db/keys_collection_manager.cpp b/src/mongo/db/keys_collection_manager.cpp index 31e97291948..b03f6417afb 100644 --- a/src/mongo/db/keys_collection_manager.cpp +++ b/src/mongo/db/keys_collection_manager.cpp @@ -240,12 +240,6 @@ void KeysCollectionManager::PeriodicRunner::_doPeriodicRefresh(ServiceContext* s std::string threadName, Milliseconds refreshInterval) { ThreadClient tc(threadName, service); - - { - stdx::lock_guard<Client> lk(*tc.get()); - tc.get()->setSystemOperationKillableByStepdown(lk); - } - ON_BLOCK_EXIT([this]() mutable { _hasSeenKeys.store(false); }); unsigned errorCount = 0; diff --git a/src/mongo/db/mongod_main.cpp b/src/mongo/db/mongod_main.cpp index e925e51cd42..291856a40df 100644 --- a/src/mongo/db/mongod_main.cpp +++ b/src/mongo/db/mongod_main.cpp @@ -421,6 +421,12 @@ MONGO_FAIL_POINT_DEFINE(shutdownAtStartup); ExitCode _initAndListen(ServiceContext* serviceContext, int listenPort) { Client::initThread("initandlisten"); + // TODO(SERVER-74659): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + serviceContext->setFastClockSource(FastClockSourceFactory::create(Milliseconds(10))); DBDirectClientFactory::get(serviceContext).registerImplementation([](OperationContext* opCtx) { @@ -1159,6 +1165,9 @@ auto makeReplicaSetNodeExecutor(ServiceContext* serviceContext) { tpOptions.maxThreads = ThreadPool::Options::kUnlimited; tpOptions.onCreateThread = [](const std::string& threadName) { Client::initThread(threadName.c_str()); + + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); }; auto hookList = std::make_unique<rpc::EgressMetadataHookList>(); hookList->addHook(std::make_unique<rpc::VectorClockMetadataHook>(serviceContext)); @@ -1175,6 +1184,9 @@ auto makeReplicationExecutor(ServiceContext* serviceContext) { tpOptions.maxThreads = 50; tpOptions.onCreateThread = [](const std::string& threadName) { Client::initThread(threadName.c_str()); + + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); }; auto hookList = std::make_unique<rpc::EgressMetadataHookList>(); hookList->addHook(std::make_unique<rpc::VectorClockMetadataHook>(serviceContext)); @@ -1322,9 +1334,13 @@ MONGO_INITIALIZER_GENERAL(setSSLManagerType, (), ("SSLManager")) void shutdownTask(const ShutdownTaskArgs& shutdownArgs) { // This client initiation pattern is only to be used here, with plans to eliminate this pattern // down the line. - if (!haveClient()) + if (!haveClient()) { Client::initThread(getThreadName()); + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + auto const client = Client::getCurrent(); auto const serviceContext = client->getServiceContext(); diff --git a/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp b/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp index 09467c988a5..3e3d10cad3e 100644 --- a/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp +++ b/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp @@ -115,7 +115,9 @@ void PeriodicThreadToAbortExpiredTransactions::_init(ServiceContext* serviceCont LOGV2_DEBUG(4684101, 2, "Periodic job canceled", "{reason}"_attr = ex.reason()); } }, - getPeriod(gTransactionLifetimeLimitSeconds.load())); + getPeriod(gTransactionLifetimeLimitSeconds.load()), + // TODO(SERVER-74656): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/); _anchor = std::make_shared<PeriodicJobAnchor>(periodicRunner->makeJob(std::move(job))); diff --git a/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp b/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp index 15d4b3a7d34..19ec2bb895b 100644 --- a/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp +++ b/src/mongo/db/pipeline/change_stream_expired_pre_image_remover.cpp @@ -90,10 +90,6 @@ public: ThreadClient tc(name(), getGlobalServiceContext()); AuthorizationSession::get(cc())->grantInternalAuthorization(&cc()); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc.get()->setSystemOperationKillableByStepdown(lk); - } while (true) { LOGV2_DEBUG(6278517, 3, "Thread awake"); diff --git a/src/mongo/db/pipeline/document_source_out.cpp b/src/mongo/db/pipeline/document_source_out.cpp index db53f060070..4c8103b2e37 100644 --- a/src/mongo/db/pipeline/document_source_out.cpp +++ b/src/mongo/db/pipeline/document_source_out.cpp @@ -71,6 +71,13 @@ DocumentSourceOut::~DocumentSourceOut() { if (_tempNs.size() || (_timeseries && !_timeseriesViewCreated)) { auto cleanupClient = pExpCtx->opCtx->getServiceContext()->makeClient("$out_replace_coll_cleanup"); + + // TODO(SERVER-74662): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*cleanupClient.get()); + cleanupClient.get()->setSystemOperationUnkillableByStepdown(lk); + } + AlternativeClientRegion acr(cleanupClient); // Create a new operation context so that any interrupts on the current operation will // not affect the dropCollection operation below. diff --git a/src/mongo/db/process_health/config_server_health_observer.cpp b/src/mongo/db/process_health/config_server_health_observer.cpp index bf011d28472..2044e68d674 100644 --- a/src/mongo/db/process_health/config_server_health_observer.cpp +++ b/src/mongo/db/process_health/config_server_health_observer.cpp @@ -173,6 +173,12 @@ Future<ConfigServerHealthObserver::CheckResult> ConfigServerHealthObserver::_che checkCtx->opCtx = checkCtx->client->makeOperationContext(); checkCtx->opCtx->setDeadlineAfterNowBy(kObserverTimeout, ErrorCodes::ExceededTimeLimit); + // TODO(SERVER-74659): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*checkCtx->client.get()); + checkCtx->client.get()->setSystemOperationUnkillableByStepdown(lk); + } + LOGV2_DEBUG(5939001, 3, "Checking Config server health"); _runSmokeReadShardsCommand(checkCtx); diff --git a/src/mongo/db/process_health/dns_health_observer.cpp b/src/mongo/db/process_health/dns_health_observer.cpp index b4a8bf0bf2b..146c7881737 100644 --- a/src/mongo/db/process_health/dns_health_observer.cpp +++ b/src/mongo/db/process_health/dns_health_observer.cpp @@ -71,6 +71,13 @@ Future<HealthCheckStatus> DnsHealthObserver::periodicCheckImpl( if (!isFailPointActive) { auto client = _svcCtx->makeClient("DNSHealthObserver"); + + // TODO(SERVER-74659): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto opCtx = client->makeOperationContext(); auto const shardRegistry = Grid::get(_svcCtx)->shardRegistry(); auto shardIds = shardRegistry->getAllShardIds(opCtx.get()); diff --git a/src/mongo/db/process_health/progress_monitor.cpp b/src/mongo/db/process_health/progress_monitor.cpp index d940a06c498..fd16d76606c 100644 --- a/src/mongo/db/process_health/progress_monitor.cpp +++ b/src/mongo/db/process_health/progress_monitor.cpp @@ -141,6 +141,12 @@ void ProgressMonitor::_progressMonitorLoop() { Client::initThread("FaultManagerProgressMonitor"_sd, _svcCtx, nullptr); static const int kSleepsPerInterval = 10; + // TODO(SERVER-74659): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + while (!_terminate.load()) { progressMonitorCheck(_crashCb); diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp index daa0389c9fa..0cb2c057f8f 100644 --- a/src/mongo/db/repl/bgsync.cpp +++ b/src/mongo/db/repl/bgsync.cpp @@ -226,6 +226,11 @@ void BackgroundSync::_run() { Client::initThread("BackgroundSync"); AuthorizationSession::get(cc())->grantInternalAuthorization(&cc()); + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + while (!inShutdown()) { try { _runProducer(); diff --git a/src/mongo/db/repl/noop_writer.cpp b/src/mongo/db/repl/noop_writer.cpp index 4bf8188010b..81be6651282 100644 --- a/src/mongo/db/repl/noop_writer.cpp +++ b/src/mongo/db/repl/noop_writer.cpp @@ -83,6 +83,13 @@ public: private: void run(Seconds waitTime, NoopWriteFn noopWrite) { Client::initThread("NoopWriter"); + + // TODO(SERVER-74656): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + while (true) { const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext(); OperationContext& opCtx = *opCtxPtr; diff --git a/src/mongo/db/repl/oplog_applier.cpp b/src/mongo/db/repl/oplog_applier.cpp index af33eaf9633..3e8364baa34 100644 --- a/src/mongo/db/repl/oplog_applier.cpp +++ b/src/mongo/db/repl/oplog_applier.cpp @@ -171,9 +171,9 @@ std::unique_ptr<ThreadPool> makeReplWriterPool(int threadCount, auto client = Client::getCurrent(); AuthorizationSession::get(*client)->grantInternalAuthorization(client); - if (isKillableByStepdown) { + if (!isKillableByStepdown) { stdx::lock_guard<Client> lk(*client); - client->setSystemOperationKillableByStepdown(lk); + client->setSystemOperationUnkillableByStepdown(lk); } }; auto pool = std::make_unique<ThreadPool>(options); diff --git a/src/mongo/db/repl/oplog_applier_impl.cpp b/src/mongo/db/repl/oplog_applier_impl.cpp index baff24f4ba7..10239d41777 100644 --- a/src/mongo/db/repl/oplog_applier_impl.cpp +++ b/src/mongo/db/repl/oplog_applier_impl.cpp @@ -296,6 +296,11 @@ void ApplyBatchFinalizerForJournal::record(const OpTimeAndWallTime& newOpTimeAnd void ApplyBatchFinalizerForJournal::_run() { Client::initThread("ApplyBatchFinalizerForJournal"); + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + while (true) { OpTimeAndWallTime latestOpTimeAndWallTime = {OpTime(), Date_t()}; diff --git a/src/mongo/db/repl/oplog_batcher.cpp b/src/mongo/db/repl/oplog_batcher.cpp index 86415c4f1ad..a652d7346e3 100644 --- a/src/mongo/db/repl/oplog_batcher.cpp +++ b/src/mongo/db/repl/oplog_batcher.cpp @@ -298,6 +298,13 @@ void OplogBatcher::_consume(OperationContext* opCtx, OplogBuffer* oplogBuffer) { void OplogBatcher::_run(StorageInterface* storageInterface) { Client::initThread("ReplBatcher"); + { + // The OplogBatcher's thread has its own shutdown sequence triggered by the OplogApplier, + // so we don't want it to be killed in other ways. + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + BatchLimits batchLimits; while (true) { diff --git a/src/mongo/db/repl/primary_only_service.cpp b/src/mongo/db/repl/primary_only_service.cpp index ab4f061d84d..4bf6d12cc49 100644 --- a/src/mongo/db/repl/primary_only_service.cpp +++ b/src/mongo/db/repl/primary_only_service.cpp @@ -335,9 +335,6 @@ void PrimaryOnlyService::startup(OperationContext* opCtx) { auto client = Client::getCurrent(); AuthorizationSession::get(*client)->grantInternalAuthorization(&cc()); - stdx::lock_guard<Client> lk(*client); - client->setSystemOperationKillableByStepdown(lk); - // Associate this Client with this PrimaryOnlyService primaryOnlyServiceStateForClient(client).primaryOnlyService = this; }; diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp index 313ac3773f6..427c05f6dbd 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp @@ -146,6 +146,12 @@ auto makeThreadPool(const std::string& poolName, const std::string& threadName) threadPoolOptions.poolName = poolName; threadPoolOptions.onCreateThread = [](const std::string& threadName) { Client::initThread(threadName.c_str()); + + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + AuthorizationSession::get(cc())->grantInternalAuthorization(&cc()); }; return std::make_unique<ThreadPool>(threadPoolOptions); diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 1113eb9bc6c..60ba492573f 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -2651,6 +2651,11 @@ void ReplicationCoordinatorImpl::AutoGetRstlForStepUpStepDown::_killOpThreadFn() invariant(!cc().isFromUserConnection()); + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + LOGV2(21343, "Starting to kill user operations"); auto uniqueOpCtx = cc().makeOperationContext(); OperationContext* opCtx = uniqueOpCtx.get(); diff --git a/src/mongo/db/repl/replication_coordinator_test_fixture.cpp b/src/mongo/db/repl/replication_coordinator_test_fixture.cpp index 744e1cf824f..6b50af1e0b7 100644 --- a/src/mongo/db/repl/replication_coordinator_test_fixture.cpp +++ b/src/mongo/db/repl/replication_coordinator_test_fixture.cpp @@ -116,7 +116,10 @@ void ReplCoordTest::addSelf(const HostAndPort& selfHost) { void ReplCoordTest::init() { invariant(!_repl); invariant(!_callShutdown); - + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } auto service = getGlobalServiceContext(); _storageInterface = new StorageInterfaceMock(); StorageInterface::set(service, std::unique_ptr<StorageInterface>(_storageInterface)); @@ -160,6 +163,9 @@ void ReplCoordTest::init() { executor::ThreadPoolMock::Options tpOptions; tpOptions.onCreateThread = []() { Client::initThread("replexec"); + + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); }; auto pool = std::make_unique<executor::ThreadPoolMock>(_net, seed, tpOptions); auto replExec = diff --git a/src/mongo/db/repl/rollback_test_fixture.cpp b/src/mongo/db/repl/rollback_test_fixture.cpp index f88973b38b9..5eca6a6e47c 100644 --- a/src/mongo/db/repl/rollback_test_fixture.cpp +++ b/src/mongo/db/repl/rollback_test_fixture.cpp @@ -88,7 +88,10 @@ public: void RollbackTest::setUp() { ServiceContextMongoDTest::setUp(); - + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } _storageInterface = new StorageInterfaceRollback(); auto serviceContext = getServiceContext(); auto consistencyMarkers = std::make_unique<ReplicationConsistencyMarkersMock>(); diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp index 00173186d96..01e9a24cbea 100644 --- a/src/mongo/db/repl/storage_interface_impl.cpp +++ b/src/mongo/db/repl/storage_interface_impl.cpp @@ -224,6 +224,12 @@ StorageInterfaceImpl::createCollectionForBulkLoading( auto opCtx = cc().makeOperationContext(); opCtx->setEnforceConstraints(false); + // TODO(SERVER-74656): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + // DocumentValidationSettings::kDisableInternalValidation is currently inert. // But, it's logically ok to disable internal validation as this function gets called // only during initial sync. diff --git a/src/mongo/db/repl/sync_source_feedback.cpp b/src/mongo/db/repl/sync_source_feedback.cpp index 9b74f405eb0..37af1be31a2 100644 --- a/src/mongo/db/repl/sync_source_feedback.cpp +++ b/src/mongo/db/repl/sync_source_feedback.cpp @@ -159,6 +159,12 @@ void SyncSourceFeedback::run(executor::TaskExecutor* executor, ReplicationCoordinator* replCoord) { Client::initThread("SyncSourceFeedback"); + // TODO(SERVER-74656): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + HostAndPort syncTarget; // keepAliveInterval indicates how frequently to forward progress in the absence of updates. diff --git a/src/mongo/db/repl/tenant_file_importer_service.cpp b/src/mongo/db/repl/tenant_file_importer_service.cpp index 9764eb86820..2c6bca06031 100644 --- a/src/mongo/db/repl/tenant_file_importer_service.cpp +++ b/src/mongo/db/repl/tenant_file_importer_service.cpp @@ -134,6 +134,13 @@ void TenantFileImporterService::startMigration(const UUID& migrationId) { _workerThread = std::make_unique<stdx::thread>([this, migrationId] { Client::initThread("TenantFileImporterService"); + + // TODO(SERVER-74661): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + try { _handleEvents(migrationId); } catch (const DBException& err) { diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_registry.cpp b/src/mongo/db/repl/tenant_migration_access_blocker_registry.cpp index 0b020e66722..0c193a2ab11 100644 --- a/src/mongo/db/repl/tenant_migration_access_blocker_registry.cpp +++ b/src/mongo/db/repl/tenant_migration_access_blocker_registry.cpp @@ -72,6 +72,10 @@ TenantMigrationAccessBlockerRegistry::TenantMigrationAccessBlockerRegistry() { threadPoolOptions.poolName = "TenantMigrationBlockerAsyncThreadPool"; threadPoolOptions.onCreateThread = [](const std::string& threadName) { Client::initThread(threadName.c_str()); + + // TODO(SERVER-74661): Please revisit if this thread could be made killable. + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); }; _asyncBlockingOperationsExecutor = std::make_shared<executor::ThreadPoolTaskExecutor>( std::make_unique<ThreadPool>(threadPoolOptions), diff --git a/src/mongo/db/repl/tenant_migration_donor_service.cpp b/src/mongo/db/repl/tenant_migration_donor_service.cpp index 596ed32c807..c432b856d4e 100644 --- a/src/mongo/db/repl/tenant_migration_donor_service.cpp +++ b/src/mongo/db/repl/tenant_migration_donor_service.cpp @@ -382,17 +382,6 @@ TenantMigrationDonorService::Instance::_makeRecipientCmdExecutor() { Client::initThread(threadName.c_str()); auto client = Client::getCurrent(); AuthorizationSession::get(*client)->grantInternalAuthorization(&cc()); - - // Ideally, we should also associate the client created by _recipientCmdExecutor with the - // TenantMigrationDonorService to make the opCtxs created by the task executor get - // registered in the TenantMigrationDonorService, and killed on stepdown. But that would - // require passing the pointer to the TenantMigrationService into the Instance and making - // constructInstance not const so we can set the client's decoration here. Right now there - // is no need for that since the task executor is only used with scheduleRemoteCommand and - // no opCtx will be created (the cancellation token is responsible for canceling the - // outstanding work on the task executor). - stdx::lock_guard<Client> lk(*client); - client->setSystemOperationKillableByStepdown(lk); }; auto hookList = std::make_unique<rpc::EgressMetadataHookList>(); diff --git a/src/mongo/db/repl/topology_version_observer.cpp b/src/mongo/db/repl/topology_version_observer.cpp index 07eb6c5f327..916d401ed25 100644 --- a/src/mongo/db/repl/topology_version_observer.cpp +++ b/src/mongo/db/repl/topology_version_observer.cpp @@ -178,6 +178,12 @@ void TopologyVersionObserver::_workerThreadBody() noexcept try { invariant(_serviceContext); ThreadClient tc(kTopologyVersionObserverName, _serviceContext); + // TODO(SERVER-74656): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto getTopologyVersion = [&]() -> boost::optional<TopologyVersion> { // Only the observer thread updates `_cache`, thus there is no need to hold the lock before // accessing `_cache` here. diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp index c61f2cbde99..01ad3f0aa27 100644 --- a/src/mongo/db/repl/transaction_oplog_application.cpp +++ b/src/mongo/db/repl/transaction_oplog_application.cpp @@ -786,6 +786,13 @@ void reconstructPreparedTransactions(OperationContext* opCtx, repl::OplogApplica // transaction oplog entry. auto newClient = opCtx->getServiceContext()->makeClient("reconstruct-prepared-transactions"); + + // TODO(SERVER-74656): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*newClient.get()); + newClient.get()->setSystemOperationUnkillableByStepdown(lk); + } + AlternativeClientRegion acr(newClient); const auto newOpCtx = cc().makeOperationContext(); diff --git a/src/mongo/db/repl/wait_for_majority_service.cpp b/src/mongo/db/repl/wait_for_majority_service.cpp index 37d6407e0fe..a55bfcc701e 100644 --- a/src/mongo/db/repl/wait_for_majority_service.cpp +++ b/src/mongo/db/repl/wait_for_majority_service.cpp @@ -107,6 +107,16 @@ void WaitForMajorityServiceImplBase::startup(ServiceContext* ctx) { ClientStrand::make(ctx->makeClient(kWaitClientName + _getReadOrWrite())); _waitForMajorityCancellationClient = ClientStrand::make(ctx->makeClient(kCancelClientName + _getReadOrWrite())); + // TODO(SERVER-74656): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*_waitForMajorityClient->getClientPointer()); + _waitForMajorityClient->getClientPointer()->setSystemOperationUnkillableByStepdown(lk); + } + { + stdx::lock_guard<Client> lk(*_waitForMajorityCancellationClient->getClientPointer()); + _waitForMajorityCancellationClient->getClientPointer() + ->setSystemOperationUnkillableByStepdown(lk); + } _backgroundWorkComplete = _periodicallyWaitForMajority(); _pool->startup(); _state = State::kRunning; diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp index 186d1dc66b6..8110b4a5da9 100644 --- a/src/mongo/db/s/balancer/balancer.cpp +++ b/src/mongo/db/s/balancer/balancer.cpp @@ -440,6 +440,13 @@ void Balancer::_consumeActionStreamLoop() { }); Client::initThread("BalancerSecondary"); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + auto opCtx = cc().makeOperationContext(); executor::ScopedTaskExecutor executor( Grid::get(opCtx.get())->getExecutorPool()->getFixedExecutor()); @@ -450,6 +457,13 @@ void Balancer::_consumeActionStreamLoop() { ActionsStreamPolicy* policy) { invariant(_outstandingStreamingOps.addAndFetch(-1) >= 0); ThreadClient tc("BalancerSecondaryThread::applyActionResponse", getGlobalServiceContext()); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto opCtx = tc->makeOperationContext(); policy->applyActionResult(opCtx.get(), action, response); }; @@ -626,6 +640,13 @@ void Balancer::_mainThread() { }); Client::initThread("Balancer"); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + auto opCtx = cc().makeOperationContext(); auto shardingContext = Grid::get(opCtx.get()); diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp index 82db31f0437..0db96506cdd 100644 --- a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp +++ b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp @@ -360,6 +360,13 @@ void BalancerCommandsSchedulerImpl::_workerThread() { }); Client::initThread("BalancerCommandsScheduler"); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + bool stopWorkerRequested = false; LOGV2(5847205, "Balancer scheduler thread started"); diff --git a/src/mongo/db/s/balancer_stats_registry.cpp b/src/mongo/db/s/balancer_stats_registry.cpp index 3ad4c8e0b98..cacb45c3896 100644 --- a/src/mongo/db/s/balancer_stats_registry.cpp +++ b/src/mongo/db/s/balancer_stats_registry.cpp @@ -89,6 +89,12 @@ void BalancerStatsRegistry::initializeAsync(OperationContext* opCtx) { ThreadClient tc("BalancerStatsRegistry::asynchronousInitialization", getGlobalServiceContext()); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + { stdx::lock_guard lk{_stateMutex}; if (const auto currentState = _state.load(); currentState != State::kPrimaryIdle) { diff --git a/src/mongo/db/s/collection_sharding_runtime.cpp b/src/mongo/db/s/collection_sharding_runtime.cpp index d4e95f25d03..f5f5b01d6d9 100644 --- a/src/mongo/db/s/collection_sharding_runtime.cpp +++ b/src/mongo/db/s/collection_sharding_runtime.cpp @@ -704,10 +704,6 @@ void CollectionShardingRuntime::_cleanupBeforeInstallingNewCollectionMetadata( ExecutorFuture<void>{Grid::get(opCtx)->getExecutorPool()->getFixedExecutor()} .then([svcCtx{opCtx->getServiceContext()}, oldUUID, oldShardVersion] { ThreadClient tc{"CleanUpShardedMetadata", svcCtx}; - { - stdx::lock_guard<Client> lk{*tc.get()}; - tc->setSystemOperationKillableByStepdown(lk); - } auto uniqueOpCtx{tc->makeOperationContext()}; auto opCtx{uniqueOpCtx.get()}; diff --git a/src/mongo/db/s/config/configsvr_remove_chunks_command.cpp b/src/mongo/db/s/config/configsvr_remove_chunks_command.cpp index 5f6709ae9e5..dff59191d53 100644 --- a/src/mongo/db/s/config/configsvr_remove_chunks_command.cpp +++ b/src/mongo/db/s/config/configsvr_remove_chunks_command.cpp @@ -81,11 +81,6 @@ public: // Use an ACR because we will perform a {multi: true} delete, which is otherwise not // supported on a session. auto newClient = opCtx->getServiceContext()->makeClient("RemoveChunksMetadata"); - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient->setSystemOperationKillableByStepdown(lk); - } - AlternativeClientRegion acr(newClient); auto executor = Grid::get(opCtx->getServiceContext())->getExecutorPool()->getFixedExecutor(); diff --git a/src/mongo/db/s/config/configsvr_remove_tags_command.cpp b/src/mongo/db/s/config/configsvr_remove_tags_command.cpp index 8dd75292aa7..db996fce388 100644 --- a/src/mongo/db/s/config/configsvr_remove_tags_command.cpp +++ b/src/mongo/db/s/config/configsvr_remove_tags_command.cpp @@ -79,11 +79,6 @@ public: { auto newClient = opCtx->getServiceContext()->makeClient("RemoveTagsMetadata"); - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient->setSystemOperationKillableByStepdown(lk); - } - AlternativeClientRegion acr(newClient); auto executor = Grid::get(opCtx->getServiceContext())->getExecutorPool()->getFixedExecutor(); diff --git a/src/mongo/db/s/config/configsvr_rename_collection_metadata_command.cpp b/src/mongo/db/s/config/configsvr_rename_collection_metadata_command.cpp index 6209d5992d0..2abbdd528b0 100644 --- a/src/mongo/db/s/config/configsvr_rename_collection_metadata_command.cpp +++ b/src/mongo/db/s/config/configsvr_rename_collection_metadata_command.cpp @@ -102,11 +102,6 @@ public: auto newClient = opCtx->getServiceContext()->makeClient("RenameCollectionMetadata"); AuthorizationSession::get(newClient.get()) ->grantInternalAuthorization(newClient.get()); - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient->setSystemOperationKillableByStepdown(lk); - } - AlternativeClientRegion acr(newClient); auto executor = Grid::get(opCtx->getServiceContext())->getExecutorPool()->getFixedExecutor(); diff --git a/src/mongo/db/s/config/placement_history_cleaner.cpp b/src/mongo/db/s/config/placement_history_cleaner.cpp index 7e1a5215de3..2d8fded89b4 100644 --- a/src/mongo/db/s/config/placement_history_cleaner.cpp +++ b/src/mongo/db/s/config/placement_history_cleaner.cpp @@ -139,7 +139,9 @@ void PlacementHistoryCleaner::onStepUpComplete(OperationContext* opCtx, long lon PeriodicRunner::PeriodicJob placementHistoryCleanerJob( "PlacementHistoryCleanUpJob", [](Client* client) { runOnce(client, kminPlacementHistoryEntries); }, - kJobExecutionPeriod); + kJobExecutionPeriod, + // TODO(SERVER-74658): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/); _anchor = periodicRunner->makeJob(std::move(placementHistoryCleanerJob)); _anchor.start(); diff --git a/src/mongo/db/s/config/sharding_catalog_manager.cpp b/src/mongo/db/s/config/sharding_catalog_manager.cpp index 0ace7321d3d..6482c797c0f 100644 --- a/src/mongo/db/s/config/sharding_catalog_manager.cpp +++ b/src/mongo/db/s/config/sharding_catalog_manager.cpp @@ -148,10 +148,6 @@ BSONObj commitOrAbortTransaction(OperationContext* opCtx, // that have been run on this opCtx would have set the timeout in the locker on the opCtx, but // commit should not have a lock timeout. auto newClient = getGlobalServiceContext()->makeClient("ShardingCatalogManager"); - { - stdx::lock_guard<Client> lk(*newClient); - newClient->setSystemOperationKillableByStepdown(lk); - } AlternativeClientRegion acr(newClient); auto newOpCtx = cc().makeOperationContext(); newOpCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE(); @@ -957,6 +953,11 @@ Status ShardingCatalogManager::_notifyClusterOnNewDatabases( // Setup an AlternativeClientRegion and a non-interruptible Operation Context to ensure that // the notification may be also sent out while the node is stepping down. auto altClient = opCtx->getServiceContext()->makeClient("_notifyClusterOnNewDatabases"); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + mongo::stdx::lock_guard<mongo::Client> lk(*altClient.get()); + altClient.get()->setSystemOperationUnkillableByStepdown(lk); + } AlternativeClientRegion acr(altClient); auto altOpCtxHolder = cc().makeOperationContext(); auto altOpCtx = altOpCtxHolder.get(); @@ -1152,10 +1153,6 @@ void ShardingCatalogManager::withTransaction( AlternativeSessionRegion asr(opCtx); auto* const client = asr.opCtx()->getClient(); - { - stdx::lock_guard<Client> lk(*client); - client->setSystemOperationKillableByStepdown(lk); - } asr.opCtx()->setAlwaysInterruptAtStepDownOrUp_UNSAFE(); AuthorizationSession::get(client)->grantInternalAuthorization(client); TxnNumber txnNumber = 0; @@ -1289,6 +1286,11 @@ void ShardingCatalogManager::initializePlacementHistory(OperationContext* opCtx) // internal client credentials). { auto altClient = opCtx->getServiceContext()->makeClient("initializePlacementHistory"); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*altClient.get()); + altClient.get()->setSystemOperationUnkillableByStepdown(lk); + } AuthorizationSession::get(altClient.get())->grantInternalAuthorization(altClient.get()); AlternativeClientRegion acr(altClient); auto executor = diff --git a/src/mongo/db/s/create_collection_coordinator.cpp b/src/mongo/db/s/create_collection_coordinator.cpp index 7f325f12742..78af6bd3fc4 100644 --- a/src/mongo/db/s/create_collection_coordinator.cpp +++ b/src/mongo/db/s/create_collection_coordinator.cpp @@ -320,10 +320,6 @@ void insertChunks(OperationContext* opCtx, { auto newClient = opCtx->getServiceContext()->makeClient("CreateCollectionCoordinator::insertChunks"); - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient->setSystemOperationKillableByStepdown(lk); - } AlternativeClientRegion acr(newClient); auto executor = diff --git a/src/mongo/db/s/drop_collection_coordinator.cpp b/src/mongo/db/s/drop_collection_coordinator.cpp index ba6992ca376..e8bc06a8a44 100644 --- a/src/mongo/db/s/drop_collection_coordinator.cpp +++ b/src/mongo/db/s/drop_collection_coordinator.cpp @@ -87,10 +87,6 @@ void DropCollectionCoordinator::dropCollectionLocally(OperationContext* opCtx, // an alternative client. auto newClient = opCtx->getServiceContext()->makeClient("removeRangeDeletions-" + collectionUUID->toString()); - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient->setSystemOperationKillableByStepdown(lk); - } AlternativeClientRegion acr{newClient}; auto executor = Grid::get(opCtx->getServiceContext())->getExecutorPool()->getFixedExecutor(); diff --git a/src/mongo/db/s/flush_resharding_state_change_command.cpp b/src/mongo/db/s/flush_resharding_state_change_command.cpp index b9494be2d30..c0e1523ed6e 100644 --- a/src/mongo/db/s/flush_resharding_state_change_command.cpp +++ b/src/mongo/db/s/flush_resharding_state_change_command.cpp @@ -114,11 +114,6 @@ public: ExecutorFuture<void>(Grid::get(opCtx)->getExecutorPool()->getArbitraryExecutor()) .then([svcCtx = opCtx->getServiceContext(), nss = ns()] { ThreadClient tc("FlushReshardingStateChange", svcCtx); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } - auto opCtx = tc->makeOperationContext(); onCollectionPlacementVersionMismatch( opCtx.get(), nss, boost::none /* chunkVersionReceived */); diff --git a/src/mongo/db/s/global_index/global_index_cloning_service.cpp b/src/mongo/db/s/global_index/global_index_cloning_service.cpp index 454d99e93c3..78d4ffc27db 100644 --- a/src/mongo/db/s/global_index/global_index_cloning_service.cpp +++ b/src/mongo/db/s/global_index/global_index_cloning_service.cpp @@ -212,8 +212,14 @@ void GlobalIndexCloningService::CloningStateMachine::_init( _metadata.getNss(), indexSpec.getName(), _metadata.getIndexCollectionUUID(), **executor); auto client = _serviceContext->makeClient("globalIndexClonerServiceInit"); - AlternativeClientRegion clientRegion(client); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + + AlternativeClientRegion clientRegion(client); auto opCtx = _serviceContext->makeOperationContext(Client::getCurrent()); auto routingInfo = diff --git a/src/mongo/db/s/global_index/global_index_inserter.cpp b/src/mongo/db/s/global_index/global_index_inserter.cpp index 8ebadf1dd90..ec9d4883d63 100644 --- a/src/mongo/db/s/global_index/global_index_inserter.cpp +++ b/src/mongo/db/s/global_index/global_index_inserter.cpp @@ -77,12 +77,6 @@ void GlobalIndexInserter::processDoc(OperationContext* opCtx, const auto& skipIdDocResults) { auto client = service->makeClient("globalIndexInserter"); auto opCtx = service->makeOperationContext(client.get()); - - { - stdx::lock_guard<Client> lk(*client); - client->setSystemOperationKillableByStepdown(lk); - } - globalIndexInserterPauseAfterReadingSkipCollection.pauseWhileSet(opCtx.get()); if (!skipIdDocResults.empty()) { diff --git a/src/mongo/db/s/migration_batch_fetcher.h b/src/mongo/db/s/migration_batch_fetcher.h index 8d9bc13ad13..f3e732ff2f3 100644 --- a/src/mongo/db/s/migration_batch_fetcher.h +++ b/src/mongo/db/s/migration_batch_fetcher.h @@ -161,10 +161,6 @@ private: static void onCreateThread(const std::string& threadName) { Client::initThread(threadName, getGlobalServiceContext(), nullptr); - { - stdx::lock_guard<Client> lk(cc()); - cc().setSystemOperationKillableByStepdown(lk); - } } }; // namespace mongo diff --git a/src/mongo/db/s/migration_batch_inserter.cpp b/src/mongo/db/s/migration_batch_inserter.cpp index 37b2c0d07dd..66e38650136 100644 --- a/src/mongo/db/s/migration_batch_inserter.cpp +++ b/src/mongo/db/s/migration_batch_inserter.cpp @@ -87,10 +87,6 @@ void runWithoutSession(OperationContext* opCtx, Callable callable) { void MigrationBatchInserter::onCreateThread(const std::string& threadName) { Client::initThread(threadName, getGlobalServiceContext(), nullptr); - { - stdx::lock_guard<Client> lk(cc()); - cc().setSystemOperationKillableByStepdown(lk); - } } void MigrationBatchInserter::run(Status status) const try { diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index 723de44c83b..47558dbb88a 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -596,11 +596,6 @@ repl::OpTime MigrationDestinationManager::fetchAndApplyBatch( stdx::thread applicationThread{[&] { Client::initThread("batchApplier", opCtx->getServiceContext(), nullptr); - auto client = Client::getCurrent(); - { - stdx::lock_guard lk(*client); - client->setSystemOperationKillableByStepdown(lk); - } auto executor = Grid::get(opCtx->getServiceContext())->getExecutorPool()->getFixedExecutor(); auto applicationOpCtx = CancelableOperationContext( @@ -1099,11 +1094,6 @@ void MigrationDestinationManager::_migrateThread(CancellationToken cancellationT Client::initThread("migrateThread"); auto client = Client::getCurrent(); - { - stdx::lock_guard lk(*client); - client->setSystemOperationKillableByStepdown(lk); - } - bool recovering = false; while (true) { const auto executor = @@ -1297,11 +1287,6 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, outerOpCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE(); { auto newClient = outerOpCtx->getServiceContext()->makeClient("MigrationCoordinator"); - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient->setSystemOperationKillableByStepdown(lk); - } - AlternativeClientRegion acr(newClient); auto executor = Grid::get(outerOpCtx->getServiceContext())->getExecutorPool()->getFixedExecutor(); @@ -1362,10 +1347,6 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, } auto newClient = outerOpCtx->getServiceContext()->makeClient("MigrationCoordinator"); - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient->setSystemOperationKillableByStepdown(lk); - } AlternativeClientRegion acr(newClient); auto executor = Grid::get(outerOpCtx->getServiceContext())->getExecutorPool()->getFixedExecutor(); @@ -1635,10 +1616,6 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, } else { outerOpCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE(); auto newClient = outerOpCtx->getServiceContext()->makeClient("MigrationCoordinator"); - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient->setSystemOperationKillableByStepdown(lk); - } AlternativeClientRegion acr(newClient); auto executor = Grid::get(outerOpCtx->getServiceContext())->getExecutorPool()->getFixedExecutor(); @@ -1668,10 +1645,6 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, outerOpCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE(); auto newClient = outerOpCtx->getServiceContext()->makeClient("MigrationCoordinator"); - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient->setSystemOperationKillableByStepdown(lk); - } AlternativeClientRegion acr(newClient); auto executor = Grid::get(outerOpCtx->getServiceContext())->getExecutorPool()->getFixedExecutor(); diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp index 066d898b26a..3b98d03285b 100644 --- a/src/mongo/db/s/migration_source_manager.cpp +++ b/src/mongo/db/s/migration_source_manager.cpp @@ -743,10 +743,6 @@ void MigrationSourceManager::_cleanup(bool completeMigration) noexcept { } auto newClient = _opCtx->getServiceContext()->makeClient("MigrationCoordinator"); - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient->setSystemOperationKillableByStepdown(lk); - } AlternativeClientRegion acr(newClient); auto newOpCtxPtr = cc().makeOperationContext(); auto newOpCtx = newOpCtxPtr.get(); diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp index f890664cf4a..db69fb0a243 100644 --- a/src/mongo/db/s/migration_util.cpp +++ b/src/mongo/db/s/migration_util.cpp @@ -220,12 +220,6 @@ void retryIdempotentWorkAsPrimaryUntilSuccessOrStepdown( try { auto newClient = opCtx->getServiceContext()->makeClient(newClientName); - - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient->setSystemOperationKillableByStepdown(lk); - } - auto newOpCtx = newClient->makeOperationContext(); AlternativeClientRegion altClient(newClient); @@ -450,10 +444,6 @@ ExecutorFuture<void> cleanUpRange(ServiceContext* serviceContext, const RangeDeletionTask& deletionTask) { return AsyncTry([=]() mutable { ThreadClient tc(kRangeDeletionThreadName, serviceContext); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } auto uniqueOpCtx = tc->makeOperationContext(); auto opCtx = uniqueOpCtx.get(); opCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE(); @@ -524,10 +514,6 @@ ExecutorFuture<void> submitRangeDeletionTask(OperationContext* opCtx, return ExecutorFuture<void>(executor) .then([=] { ThreadClient tc(kRangeDeletionThreadName, serviceContext); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } uassert( ErrorCodes::ResumableRangeDeleterDisabled, @@ -540,10 +526,6 @@ ExecutorFuture<void> submitRangeDeletionTask(OperationContext* opCtx, return cleanUpRange(serviceContext, executor, deletionTask) .onError<ErrorCodes::KeyPatternShorterThanBound>([=](Status status) { ThreadClient tc(kRangeDeletionThreadName, serviceContext); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } auto uniqueOpCtx = tc->makeOperationContext(); uniqueOpCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE(); @@ -565,10 +547,6 @@ ExecutorFuture<void> submitRangeDeletionTask(OperationContext* opCtx, }) .onError([=](const Status status) { ThreadClient tc(kRangeDeletionThreadName, serviceContext); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } auto uniqueOpCtx = tc->makeOperationContext(); auto opCtx = uniqueOpCtx.get(); @@ -608,10 +586,6 @@ void resubmitRangeDeletionsOnStepUp(ServiceContext* serviceContext) { ExecutorFuture<void>(getMigrationUtilExecutor(serviceContext)) .then([serviceContext] { ThreadClient tc("ResubmitRangeDeletions", serviceContext); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } auto opCtx = tc->makeOperationContext(); opCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE(); @@ -644,10 +618,6 @@ void resubmitRangeDeletionsOnStepUp(ServiceContext* serviceContext) { }) .then([serviceContext] { ThreadClient tc("ResubmitRangeDeletions", serviceContext); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } auto opCtx = tc->makeOperationContext(); opCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE(); @@ -1179,10 +1149,6 @@ ExecutorFuture<void> launchReleaseCriticalSectionOnRecipientFuture( return ExecutorFuture<void>(executor).then([=] { ThreadClient tc("releaseRecipientCritSec", serviceContext); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } auto uniqueOpCtx = tc->makeOperationContext(); auto opCtx = uniqueOpCtx.get(); @@ -1320,10 +1286,6 @@ void asyncRecoverMigrationUntilSuccessOrStepDown(OperationContext* opCtx, ExecutorFuture<void>{Grid::get(opCtx)->getExecutorPool()->getFixedExecutor()} .then([svcCtx{opCtx->getServiceContext()}, nss] { ThreadClient tc{"MigrationRecovery", svcCtx}; - { - stdx::lock_guard<Client> lk{*tc.get()}; - tc->setSystemOperationKillableByStepdown(lk); - } auto uniqueOpCtx{tc->makeOperationContext()}; auto opCtx{uniqueOpCtx.get()}; diff --git a/src/mongo/db/s/periodic_sharded_index_consistency_checker.cpp b/src/mongo/db/s/periodic_sharded_index_consistency_checker.cpp index 78431d197df..4379554fae3 100644 --- a/src/mongo/db/s/periodic_sharded_index_consistency_checker.cpp +++ b/src/mongo/db/s/periodic_sharded_index_consistency_checker.cpp @@ -195,7 +195,9 @@ void PeriodicShardedIndexConsistencyChecker::_launchShardedIndexConsistencyCheck "error"_attr = ex.toStatus()); } }, - Milliseconds(shardedIndexConsistencyCheckIntervalMS)); + Milliseconds(shardedIndexConsistencyCheckIntervalMS), + // TODO(SERVER-74658): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/); _shardedIndexConsistencyChecker = periodicRunner->makeJob(std::move(job)); _shardedIndexConsistencyChecker.start(); } diff --git a/src/mongo/db/s/persistent_task_queue_test.cpp b/src/mongo/db/s/persistent_task_queue_test.cpp index d9dd6e3562c..9bc33346ae9 100644 --- a/src/mongo/db/s/persistent_task_queue_test.cpp +++ b/src/mongo/db/s/persistent_task_queue_test.cpp @@ -288,11 +288,6 @@ TEST_F(PersistentTaskQueueTest, TestKilledOperationContextWhileWaitingOnCV) { auto result = stdx::async(stdx::launch::async, [this, &q, &barrier] { ThreadClient tc("TestKilledOperationContextWhileWaitingOnCV", getServiceContext()); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } - auto opCtx = tc->makeOperationContext(); barrier.countDownAndWait(); diff --git a/src/mongo/db/s/query_analysis_writer.cpp b/src/mongo/db/s/query_analysis_writer.cpp index ce48eef2f14..0289bec8053 100644 --- a/src/mongo/db/s/query_analysis_writer.cpp +++ b/src/mongo/db/s/query_analysis_writer.cpp @@ -290,7 +290,9 @@ void QueryAnalysisWriter::onStartup(OperationContext* opCtx) { auto opCtx = client->makeOperationContext(); _flushQueries(opCtx.get()); }, - Seconds(gQueryAnalysisWriterIntervalSecs)); + Seconds(gQueryAnalysisWriterIntervalSecs), + // TODO(SERVER-74662): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/); _periodicQueryWriter = periodicRunner->makeJob(std::move(queryWriterJob)); _periodicQueryWriter.start(); @@ -303,7 +305,9 @@ void QueryAnalysisWriter::onStartup(OperationContext* opCtx) { auto opCtx = client->makeOperationContext(); _flushDiffs(opCtx.get()); }, - Seconds(gQueryAnalysisWriterIntervalSecs)); + Seconds(gQueryAnalysisWriterIntervalSecs), + // TODO(SERVER-74662): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/); _periodicDiffWriter = periodicRunner->makeJob(std::move(diffWriterJob)); _periodicDiffWriter.start(); @@ -314,6 +318,10 @@ void QueryAnalysisWriter::onStartup(OperationContext* opCtx) { threadPoolOptions.poolName = "QueryAnalysisWriterThreadPool"; threadPoolOptions.onCreateThread = [](const std::string& threadName) { Client::initThread(threadName.c_str()); + + // TODO(SERVER-74662): Please revisit if this thread could be made killable. + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); }; _executor = std::make_shared<executor::ThreadPoolTaskExecutor>( std::make_unique<ThreadPool>(threadPoolOptions), diff --git a/src/mongo/db/s/range_deleter_service.cpp b/src/mongo/db/s/range_deleter_service.cpp index f392cd34717..94539b3127b 100644 --- a/src/mongo/db/s/range_deleter_service.cpp +++ b/src/mongo/db/s/range_deleter_service.cpp @@ -133,11 +133,6 @@ void RangeDeleterService::ReadyRangeDeletionsProcessor::_completedRangeDeletion( void RangeDeleterService::ReadyRangeDeletionsProcessor::_runRangeDeletions() { Client::initThread(kRangeDeletionThreadName); { - stdx::lock_guard<Client> lk(cc()); - cc().setSystemOperationKillableByStepdown(lk); - } - - { stdx::lock_guard<Latch> lock(_mutex); if (_state != kRunning) { return; diff --git a/src/mongo/db/s/range_deletion_util.h b/src/mongo/db/s/range_deletion_util.h index 1042137a184..870284248b6 100644 --- a/src/mongo/db/s/range_deletion_util.h +++ b/src/mongo/db/s/range_deletion_util.h @@ -130,10 +130,6 @@ auto withTemporaryOperationContext(Callable&& callable, const UUID& collectionUUID, bool writeToRangeDeletionNamespace = false) { ThreadClient tc(kRangeDeletionThreadName, getGlobalServiceContext()); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } auto uniqueOpCtx = Client::getCurrent()->makeOperationContext(); auto opCtx = uniqueOpCtx.get(); diff --git a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp index 4fc4e42fbbc..519aea48bf8 100644 --- a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp +++ b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp @@ -362,6 +362,12 @@ SemiFuture<void> ReshardingCollectionCloner::run( auto client = cc().getServiceContext()->makeClient("ReshardingCollectionClonerCleanupClient"); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + AlternativeClientRegion acr(client); auto opCtx = cc().makeOperationContext(); diff --git a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp index c75396abcdf..c525f2ec38a 100644 --- a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp +++ b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp @@ -130,11 +130,6 @@ public: executor::ThreadPoolMock::Options threadPoolOptions; threadPoolOptions.onCreateThread = [] { Client::initThread("TestReshardingDonorOplogIterator"); - auto& client = cc(); - { - stdx::lock_guard<Client> lk(client); - client.setSystemOperationKillableByStepdown(lk); - } }; auto executor = executor::makeThreadPoolTestExecutor( @@ -175,8 +170,6 @@ public: ServiceContext::UniqueClient makeKillableClient() { auto client = getServiceContext()->makeClient("ReshardingDonorOplogIterator"); - stdx::lock_guard<Client> lk(*client); - client->setSystemOperationKillableByStepdown(lk); return client; } diff --git a/src/mongo/db/s/resharding/resharding_donor_recipient_common.cpp b/src/mongo/db/s/resharding/resharding_donor_recipient_common.cpp index 116d975da7c..ff981eb549f 100644 --- a/src/mongo/db/s/resharding/resharding_donor_recipient_common.cpp +++ b/src/mongo/db/s/resharding/resharding_donor_recipient_common.cpp @@ -380,11 +380,6 @@ void clearFilteringMetadata(OperationContext* opCtx, AsyncTry([svcCtx = opCtx->getServiceContext(), nss] { ThreadClient tc("TriggerReshardingRecovery", svcCtx); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } - auto opCtx = tc->makeOperationContext(); onCollectionPlacementVersionMismatch( opCtx.get(), nss, boost::none /* chunkVersionReceived */); diff --git a/src/mongo/db/s/resharding/resharding_oplog_application.cpp b/src/mongo/db/s/resharding/resharding_oplog_application.cpp index bce5a6f8b1f..c25c379139a 100644 --- a/src/mongo/db/s/resharding/resharding_oplog_application.cpp +++ b/src/mongo/db/s/resharding/resharding_oplog_application.cpp @@ -68,10 +68,6 @@ void runWithTransaction(OperationContext* opCtx, unique_function<void(OperationContext*)> func) { AlternativeSessionRegion asr(opCtx); auto* const client = asr.opCtx()->getClient(); - { - stdx::lock_guard<Client> lk(*client); - client->setSystemOperationKillableByStepdown(lk); - } asr.opCtx()->setAlwaysInterruptAtStepDownOrUp_UNSAFE(); AuthorizationSession::get(client)->grantInternalAuthorization(client); diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier.cpp b/src/mongo/db/s/resharding/resharding_oplog_applier.cpp index b81e262d0a3..51632cb40cc 100644 --- a/src/mongo/db/s/resharding/resharding_oplog_applier.cpp +++ b/src/mongo/db/s/resharding/resharding_oplog_applier.cpp @@ -197,6 +197,12 @@ SemiFuture<void> ReshardingOplogApplier::run( auto client = cc().getServiceContext()->makeClient("ReshardingOplogApplierCleanupClient"); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + AlternativeClientRegion acr(client); auto opCtx = cc().makeOperationContext(); diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_applier_test.cpp index 330084774b3..1fc013463e1 100644 --- a/src/mongo/db/s/resharding/resharding_oplog_applier_test.cpp +++ b/src/mongo/db/s/resharding/resharding_oplog_applier_test.cpp @@ -375,11 +375,6 @@ protected: Client::initThread(threadName.c_str()); auto* client = Client::getCurrent(); AuthorizationSession::get(*client)->grantInternalAuthorization(client); - - { - stdx::lock_guard<Client> lk(*client); - client->setSystemOperationKillableByStepdown(lk); - } }; auto hookList = std::make_unique<rpc::EgressMetadataHookList>(); diff --git a/src/mongo/db/s/resharding/resharding_oplog_batch_applier_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_batch_applier_test.cpp index 35d9161f7e8..487e8717038 100644 --- a/src/mongo/db/s/resharding/resharding_oplog_batch_applier_test.cpp +++ b/src/mongo/db/s/resharding/resharding_oplog_batch_applier_test.cpp @@ -156,11 +156,6 @@ public: Client::initThread(threadName.c_str()); auto* client = Client::getCurrent(); AuthorizationSession::get(*client)->grantInternalAuthorization(client); - - { - stdx::lock_guard<Client> lk(*client); - client->setSystemOperationKillableByStepdown(lk); - } }; auto hookList = std::make_unique<rpc::EgressMetadataHookList>(); diff --git a/src/mongo/db/s/resharding/resharding_oplog_fetcher.cpp b/src/mongo/db/s/resharding/resharding_oplog_fetcher.cpp index 97a9788ef62..b1e91410aab 100644 --- a/src/mongo/db/s/resharding/resharding_oplog_fetcher.cpp +++ b/src/mongo/db/s/resharding/resharding_oplog_fetcher.cpp @@ -164,6 +164,13 @@ ExecutorFuture<void> ReshardingOplogFetcher::_reschedule( _reshardingUUID.toString(), _donorShard.toString()), _service()); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + return iterate(client.get(), factory); }) .then([executor, cancelToken](bool moreToCome) { @@ -328,6 +335,12 @@ bool ReshardingOplogFetcher::consume(Client* client, auto opCtxRaii = factory.makeOperationContext(client.get()); auto opCtx = opCtxRaii.get(); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + // Noting some possible optimizations: // // * Batch more inserts into larger storage transactions. diff --git a/src/mongo/db/s/resharding/resharding_txn_cloner.cpp b/src/mongo/db/s/resharding/resharding_txn_cloner.cpp index 8a015e52af5..500c4a2f990 100644 --- a/src/mongo/db/s/resharding/resharding_txn_cloner.cpp +++ b/src/mongo/db/s/resharding/resharding_txn_cloner.cpp @@ -307,6 +307,12 @@ SemiFuture<void> ReshardingTxnCloner::run( auto client = cc().getServiceContext()->makeClient("ReshardingTxnClonerCleanupClient"); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + AlternativeClientRegion acr(client); auto opCtx = cc().makeOperationContext(); diff --git a/src/mongo/db/s/resharding/resharding_txn_cloner_test.cpp b/src/mongo/db/s/resharding/resharding_txn_cloner_test.cpp index aae776a4029..418657ec6b3 100644 --- a/src/mongo/db/s/resharding/resharding_txn_cloner_test.cpp +++ b/src/mongo/db/s/resharding/resharding_txn_cloner_test.cpp @@ -341,11 +341,6 @@ protected: Client::initThread(threadName.c_str()); auto* client = Client::getCurrent(); AuthorizationSession::get(*client)->grantInternalAuthorization(client); - - { - stdx::lock_guard<Client> lk(*client); - client->setSystemOperationKillableByStepdown(lk); - } }; auto hookList = std::make_unique<rpc::EgressMetadataHookList>(); diff --git a/src/mongo/db/s/resharding_test_commands.cpp b/src/mongo/db/s/resharding_test_commands.cpp index 74688928784..df291db2f00 100644 --- a/src/mongo/db/s/resharding_test_commands.cpp +++ b/src/mongo/db/s/resharding_test_commands.cpp @@ -72,11 +72,6 @@ public: Client::initThread(threadName.c_str()); auto* client = Client::getCurrent(); AuthorizationSession::get(*client)->grantInternalAuthorization(client); - - { - stdx::lock_guard<Client> lk(*client); - client->setSystemOperationKillableByStepdown(lk); - } }; auto metrics = ReshardingMetrics::makeInstance( diff --git a/src/mongo/db/s/session_catalog_migration_destination.cpp b/src/mongo/db/s/session_catalog_migration_destination.cpp index ae9eec23cc2..9ea5e089ef4 100644 --- a/src/mongo/db/s/session_catalog_migration_destination.cpp +++ b/src/mongo/db/s/session_catalog_migration_destination.cpp @@ -262,12 +262,6 @@ void SessionCatalogMigrationDestination::join() { void SessionCatalogMigrationDestination::_retrieveSessionStateFromSource(ServiceContext* service) { Client::initThread( "sessionCatalogMigrationProducer-" + _migrationSessionId.toString(), service, nullptr); - auto client = Client::getCurrent(); - { - stdx::lock_guard lk(*client); - client->setSystemOperationKillableByStepdown(lk); - } - bool oplogDrainedAfterCommiting = false; ProcessOplogResult lastResult; repl::OpTime lastOpTimeWaited; diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp index ecc4832caa3..78e4ceb8f98 100644 --- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp +++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp @@ -178,11 +178,6 @@ SharedSemiFuture<void> recoverRefreshDbVersion(OperationContext* opCtx, serviceCtx = opCtx->getServiceContext(), forwardableOpMetadata = ForwardableOperationMetadata(opCtx)] { ThreadClient tc("DbMetadataRefreshThread", serviceCtx); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } - const auto opCtxHolder = CancelableOperationContext(tc->makeOperationContext(), cancellationToken, executor); auto opCtx = opCtxHolder.get(); @@ -367,10 +362,6 @@ SharedSemiFuture<void> recoverRefreshCollectionPlacementVersion( return ExecutorFuture<void>(executor) .then([=] { ThreadClient tc("RecoverRefreshThread", serviceContext); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } if (MONGO_unlikely(hangInRecoverRefreshThread.shouldFail())) { hangInRecoverRefreshThread.pauseWhileSet(); diff --git a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp index 30eed2fbf81..fd654f84dee 100644 --- a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp +++ b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp @@ -457,6 +457,13 @@ SemiFuture<CollectionAndChangedChunks> ShardServerCatalogCacheLoader::getChunksS ThreadClient tc("ShardServerCatalogCacheLoader::getChunksSince", getGlobalServiceContext()); auto context = _contexts.makeOperationContext(*tc); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + { // We may have missed an OperationContextGroup interrupt since this operation // began but before the OperationContext was added to the group. So we'll check @@ -498,6 +505,12 @@ SemiFuture<DatabaseType> ShardServerCatalogCacheLoader::getDatabase(StringData d getGlobalServiceContext()); auto context = _contexts.makeOperationContext(*tc); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + { // We may have missed an OperationContextGroup interrupt since this operation began // but before the OperationContext was added to the group. So we'll check that we're @@ -1072,6 +1085,12 @@ void ShardServerCatalogCacheLoader::_runCollAndChunksTasks(const NamespaceString getGlobalServiceContext()); auto context = _contexts.makeOperationContext(*tc); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + bool taskFinished = false; bool inShutdown = false; try { @@ -1151,6 +1170,12 @@ void ShardServerCatalogCacheLoader::_runDbTasks(StringData dbName) { ThreadClient tc("ShardServerCatalogCacheLoader::runDbTasks", getGlobalServiceContext()); auto context = _contexts.makeOperationContext(*tc); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + bool taskFinished = false; bool inShutdown = false; try { diff --git a/src/mongo/db/s/sharding_ddl_util.cpp b/src/mongo/db/s/sharding_ddl_util.cpp index 600b57ecf64..2bfe11c2055 100644 --- a/src/mongo/db/s/sharding_ddl_util.cpp +++ b/src/mongo/db/s/sharding_ddl_util.cpp @@ -897,12 +897,6 @@ void runTransactionOnShardingCatalog(OperationContext* opCtx, // passed Operation context. We opt for creating a new one to avoid any possible side // effects. auto newClient = opCtx->getServiceContext()->makeClient("ShardingCatalogTransaction"); - - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient.get()->setSystemOperationKillableByStepdown(lk); - } - AuthorizationSession::get(newClient.get())->grantInternalAuthorization(newClient.get()); AlternativeClientRegion acr(newClient); diff --git a/src/mongo/db/s/sharding_initialization_mongod.cpp b/src/mongo/db/s/sharding_initialization_mongod.cpp index 5b3ff46f4c8..2573113c24b 100644 --- a/src/mongo/db/s/sharding_initialization_mongod.cpp +++ b/src/mongo/db/s/sharding_initialization_mongod.cpp @@ -230,6 +230,13 @@ private: ThreadClient tc("updateShardIdentityConfigString", _serviceContext); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto opCtx = tc->makeOperationContext(); ShardingInitializationMongoD::updateShardIdentityConfigString(opCtx.get(), update); } catch (const ExceptionForCat<ErrorCategory::ShutdownError>& e) { diff --git a/src/mongo/db/s/shardsvr_move_primary_enter_critical_section_command.cpp b/src/mongo/db/s/shardsvr_move_primary_enter_critical_section_command.cpp index 0abdf48d19e..cf84fd67a6c 100644 --- a/src/mongo/db/s/shardsvr_move_primary_enter_critical_section_command.cpp +++ b/src/mongo/db/s/shardsvr_move_primary_enter_critical_section_command.cpp @@ -69,10 +69,6 @@ public: // cause the failure of the second operation. auto newClient = getGlobalServiceContext()->makeClient( "ShardsvrMovePrimaryEnterCriticalSection"); - { - stdx::lock_guard<Client> lk(*newClient); - newClient->setSystemOperationKillableByStepdown(lk); - } AlternativeClientRegion acr(newClient); auto newOpCtx = CancelableOperationContext( cc().makeOperationContext(), diff --git a/src/mongo/db/s/shardsvr_move_primary_exit_critical_section_command.cpp b/src/mongo/db/s/shardsvr_move_primary_exit_critical_section_command.cpp index 81a0f4bd8aa..4a1bf387c6a 100644 --- a/src/mongo/db/s/shardsvr_move_primary_exit_critical_section_command.cpp +++ b/src/mongo/db/s/shardsvr_move_primary_exit_critical_section_command.cpp @@ -69,10 +69,6 @@ public: // solution is to use an alternative client as well as a new operation context. auto newClient = getGlobalServiceContext()->makeClient("ShardsvrMovePrimaryExitCriticalSection"); - { - stdx::lock_guard<Client> lk(*newClient); - newClient->setSystemOperationKillableByStepdown(lk); - } AlternativeClientRegion acr(newClient); auto newOpCtx = CancelableOperationContext( cc().makeOperationContext(), diff --git a/src/mongo/db/s/shardsvr_move_range_command.cpp b/src/mongo/db/s/shardsvr_move_range_command.cpp index 9fdacc8186c..d7840b3b8b1 100644 --- a/src/mongo/db/s/shardsvr_move_range_command.cpp +++ b/src/mongo/db/s/shardsvr_move_range_command.cpp @@ -109,10 +109,6 @@ public: // executor thread after setting the shared state as ready. auto scopedMigrationLocal(std::move(scopedMigration)); ThreadClient tc("MoveChunk", serviceContext); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } auto uniqueOpCtx = Client::getCurrent()->makeOperationContext(); auto executorOpCtx = uniqueOpCtx.get(); Status status = {ErrorCodes::InternalError, "Uninitialized value"}; diff --git a/src/mongo/db/s/shardsvr_participant_block_command.cpp b/src/mongo/db/s/shardsvr_participant_block_command.cpp index ad7f003df9b..88aff1d7444 100644 --- a/src/mongo/db/s/shardsvr_participant_block_command.cpp +++ b/src/mongo/db/s/shardsvr_participant_block_command.cpp @@ -128,10 +128,6 @@ public: if (txnParticipant) { auto newClient = getGlobalServiceContext()->makeClient("ShardSvrParticipantBlockCmdClient"); - { - stdx::lock_guard<Client> lk(*newClient); - newClient->setSystemOperationKillableByStepdown(lk); - } AlternativeClientRegion acr(newClient); auto cancelableOperationContext = CancelableOperationContext( cc().makeOperationContext(), diff --git a/src/mongo/db/s/transaction_coordinator.cpp b/src/mongo/db/s/transaction_coordinator.cpp index 39892137e9a..bbd3ea5c61e 100644 --- a/src/mongo/db/s/transaction_coordinator.cpp +++ b/src/mongo/db/s/transaction_coordinator.cpp @@ -84,6 +84,13 @@ ExecutorFuture<void> waitForMajorityWithHangFailpoint( failpoint.pauseWhileSet(); } else { ThreadClient tc(failPointName, service); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto opCtx = tc->makeOperationContext(); failpoint.pauseWhileSet(opCtx.get()); } diff --git a/src/mongo/db/s/transaction_coordinator_futures_util.h b/src/mongo/db/s/transaction_coordinator_futures_util.h index 1dd0554ee63..8ac1ab15aa5 100644 --- a/src/mongo/db/s/transaction_coordinator_futures_util.h +++ b/src/mongo/db/s/transaction_coordinator_futures_util.h @@ -101,6 +101,12 @@ public: ThreadClient tc("TransactionCoordinator", _serviceContext); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto uniqueOpCtxIter = [&] { stdx::lock_guard lk(_mutex); return _activeOpContexts.emplace(_activeOpContexts.begin(), diff --git a/src/mongo/db/session/kill_sessions_local.cpp b/src/mongo/db/session/kill_sessions_local.cpp index b40367cec27..71a00432876 100644 --- a/src/mongo/db/session/kill_sessions_local.cpp +++ b/src/mongo/db/session/kill_sessions_local.cpp @@ -192,6 +192,13 @@ void killSessionsAbortAllPreparedTransactions(OperationContext* opCtx) { void yieldLocksForPreparedTransactions(OperationContext* opCtx) { // Create a new opCtx because we need an empty locker to refresh the locks. auto newClient = opCtx->getServiceContext()->makeClient("prepared-txns-yield-locks"); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*newClient.get()); + newClient.get()->setSystemOperationUnkillableByStepdown(lk); + } + AlternativeClientRegion acr(newClient); auto newOpCtx = cc().makeOperationContext(); diff --git a/src/mongo/db/session/logical_session_cache_impl.cpp b/src/mongo/db/session/logical_session_cache_impl.cpp index 7bc51fd1137..b9b540222e6 100644 --- a/src/mongo/db/session/logical_session_cache_impl.cpp +++ b/src/mongo/db/session/logical_session_cache_impl.cpp @@ -70,13 +70,19 @@ LogicalSessionCacheImpl::LogicalSessionCacheImpl(std::unique_ptr<ServiceLiaison> _stats.setLastTransactionReaperJobTimestamp(_service->now()); if (!disableLogicalSessionCacheRefresh) { - _service->scheduleJob({"LogicalSessionCacheRefresh", - [this](Client* client) { _periodicRefresh(client); }, - Milliseconds(logicalSessionRefreshMillis)}); - - _service->scheduleJob({"LogicalSessionCacheReap", - [this](Client* client) { _periodicReap(client); }, - Milliseconds(logicalSessionRefreshMillis)}); + _service->scheduleJob( + {"LogicalSessionCacheRefresh", + [this](Client* client) { _periodicRefresh(client); }, + Milliseconds(logicalSessionRefreshMillis), + // TODO(SERVER-74659): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/}); + + _service->scheduleJob( + {"LogicalSessionCacheReap", + [this](Client* client) { _periodicReap(client); }, + Milliseconds(logicalSessionRefreshMillis), + // TODO(SERVER-74659): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/}); } } diff --git a/src/mongo/db/session/session_catalog_mongod.cpp b/src/mongo/db/session/session_catalog_mongod.cpp index 5acb0ce4d2f..633d0dca266 100644 --- a/src/mongo/db/session/session_catalog_mongod.cpp +++ b/src/mongo/db/session/session_catalog_mongod.cpp @@ -110,6 +110,13 @@ void killSessionTokens(OperationContext* opCtx, invariant(status); ThreadClient tc("Kill-Sessions", service); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto uniqueOpCtx = tc->makeOperationContext(); const auto opCtx = uniqueOpCtx.get(); const auto catalog = SessionCatalog::get(opCtx); @@ -514,6 +521,12 @@ void MongoDSessionCatalog::onStepUp(OperationContext* opCtx) { { // Create a new opCtx because we need an empty locker to refresh the locks. auto newClient = opCtx->getServiceContext()->makeClient("restore-prepared-txn"); + + { + stdx::lock_guard<Client> lk(*newClient.get()); + newClient.get()->setSystemOperationUnkillableByStepdown(lk); + } + AlternativeClientRegion acr(newClient); for (const auto& sessionInfo : sessionsToReacquireLocks) { auto newOpCtx = cc().makeOperationContext(); diff --git a/src/mongo/db/session/session_killer.cpp b/src/mongo/db/session/session_killer.cpp index fa3c6adda25..03882219ddb 100644 --- a/src/mongo/db/session/session_killer.cpp +++ b/src/mongo/db/session/session_killer.cpp @@ -51,6 +51,12 @@ SessionKiller::SessionKiller(ServiceContext* sc, KillFunc killer) ThreadClient tc("SessionKiller", sc); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + stdx::unique_lock<Latch> lk(_mutex); // While we're not in shutdown diff --git a/src/mongo/db/storage/checkpointer.cpp b/src/mongo/db/storage/checkpointer.cpp index 2595526973e..6b633075ae7 100644 --- a/src/mongo/db/storage/checkpointer.cpp +++ b/src/mongo/db/storage/checkpointer.cpp @@ -73,6 +73,11 @@ void Checkpointer::run() { ThreadClient tc(name(), getGlobalServiceContext()); LOGV2_DEBUG(22307, 1, "Starting thread", "threadName"_attr = name()); + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + while (true) { auto opCtx = tc->makeOperationContext(); diff --git a/src/mongo/db/storage/control/journal_flusher.cpp b/src/mongo/db/storage/control/journal_flusher.cpp index ebfc1c2718a..0ed04a9a747 100644 --- a/src/mongo/db/storage/control/journal_flusher.cpp +++ b/src/mongo/db/storage/control/journal_flusher.cpp @@ -80,6 +80,12 @@ void JournalFlusher::run() { ThreadClient tc(name(), getGlobalServiceContext()); LOGV2_DEBUG(4584701, 1, "starting {name} thread", "name"_attr = name()); + // TODO(SERVER-74657): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + // The thread must not run and access the service context to create an opCtx while unit test // infrastructure is still being set up and expects sole access to the service context (there is // no conurrency control on the service context during this phase). diff --git a/src/mongo/db/storage/disk_space_monitor.cpp b/src/mongo/db/storage/disk_space_monitor.cpp index 0d55c1256da..eb575d03327 100644 --- a/src/mongo/db/storage/disk_space_monitor.cpp +++ b/src/mongo/db/storage/disk_space_monitor.cpp @@ -72,7 +72,11 @@ void DiskSpaceMonitor::_start(ServiceContext* svcCtx) { LOGV2(7333401, "Starting the DiskSpaceMonitor"); invariant(!_job, "DiskSpaceMonitor is already started"); _job = svcCtx->getPeriodicRunner()->makeJob(PeriodicRunner::PeriodicJob{ - "DiskSpaceMonitor", [this](Client* client) { _run(client); }, Seconds(1)}); + "DiskSpaceMonitor", + [this](Client* client) { _run(client); }, + Seconds(1), + // TODO(SERVER-74657): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/}); _job.start(); } diff --git a/src/mongo/db/storage/flow_control.cpp b/src/mongo/db/storage/flow_control.cpp index 1866f16af6c..8f8a0b6cc6b 100644 --- a/src/mongo/db/storage/flow_control.cpp +++ b/src/mongo/db/storage/flow_control.cpp @@ -150,7 +150,9 @@ FlowControl::FlowControl(ServiceContext* service, repl::ReplicationCoordinator* [this](Client* client) { FlowControlTicketholder::get(client->getServiceContext())->refreshTo(getNumTickets()); }, - Seconds(1)}); + Seconds(1), + // TODO(SERVER-74657): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/}); _jobAnchor.start(); } diff --git a/src/mongo/db/storage/oplog_cap_maintainer_thread.cpp b/src/mongo/db/storage/oplog_cap_maintainer_thread.cpp index 2388800265d..4a9b5a23b35 100644 --- a/src/mongo/db/storage/oplog_cap_maintainer_thread.cpp +++ b/src/mongo/db/storage/oplog_cap_maintainer_thread.cpp @@ -113,6 +113,11 @@ void OplogCapMaintainerThread::run() { LOGV2_DEBUG(5295000, 1, "Oplog cap maintainer thread started", "threadName"_attr = _name); ThreadClient tc(_name, getGlobalServiceContext()); + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + while (!globalInShutdownDeprecated()) { if (MONGO_unlikely(hangOplogCapMaintainerThread.shouldFail())) { LOGV2(5095500, "Hanging the oplog cap maintainer thread due to fail point"); diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp index 490990dcbbc..3de218ecf49 100644 --- a/src/mongo/db/storage/storage_engine_impl.cpp +++ b/src/mongo/db/storage/storage_engine_impl.cpp @@ -1351,7 +1351,9 @@ void StorageEngineImpl::TimestampMonitor::_startup() { throw; } }, - Seconds(1)); + Seconds(1), + // TODO(SERVER-74657): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/); _job = _periodicRunner->makeJob(std::move(job)); _job.start(); diff --git a/src/mongo/db/storage/ticketholder_monitor.cpp b/src/mongo/db/storage/ticketholder_monitor.cpp index d77c6592ff7..46ed28310a4 100644 --- a/src/mongo/db/storage/ticketholder_monitor.cpp +++ b/src/mongo/db/storage/ticketholder_monitor.cpp @@ -38,7 +38,11 @@ TicketHolderMonitor::TicketHolderMonitor(ServiceContext* svcCtx, : _readTicketHolder(readTicketHolder), _writeTicketHolder(writeTicketHolder), _job(svcCtx->getPeriodicRunner()->makeJob(PeriodicRunner::PeriodicJob{ - "TicketHolderMonitor", [this](Client* client) { _run(client); }, interval})) {} + "TicketHolderMonitor", + [this](Client* client) { _run(client); }, + interval, + // TODO(SERVER-74657): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/})) {} void TicketHolderMonitor::start() { _job.start(); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 9ceb937a61c..2fefa77da8a 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -251,6 +251,13 @@ public: virtual void run() { ThreadClient tc(name(), getGlobalServiceContext()); + + // TODO(SERVER-74657): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + LOGV2_DEBUG(22303, 1, "starting {name} thread", "name"_attr = name()); while (!_shuttingDown.load()) { diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp index 320246d2f45..068e6d15098 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp @@ -194,6 +194,12 @@ void WiredTigerOplogManager::_updateOplogVisibilityLoop(WiredTigerSessionCache* WiredTigerRecordStore* oplogRecordStore) { Client::initThread("OplogVisibilityThread"); + // TODO(SERVER-74657): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + // This thread updates the oplog read timestamp, the timestamp used to read from the oplog with // forward cursors. The timestamp is used to hide oplog entries that might be committed but have // uncommitted entries behind them. This prevents cursors from seeing 'holes' in the oplog and diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_prepare_conflict_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_prepare_conflict_test.cpp index aabbcbfc369..cb94e2a20e3 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_prepare_conflict_test.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_prepare_conflict_test.cpp @@ -69,12 +69,6 @@ public: kvEngine = makeKVEngine(serviceContext, home.path(), &cs); opCtx->setRecoveryUnit(std::unique_ptr<RecoveryUnit>(kvEngine->newRecoveryUnit()), WriteUnitOfWork::RecoveryUnitState::kNotInUnitOfWork); - - // Sets internal states to pass invariants inside 'wiredTigerPrepareConflictRetry()'. - { - stdx::lock_guard<Client> lk(*client); - client->setSystemOperationKillableByStepdown(lk); - } } unittest::TempDir home{"temp"}; diff --git a/src/mongo/db/transaction/internal_transactions_reap_service.cpp b/src/mongo/db/transaction/internal_transactions_reap_service.cpp index 90dc23baffe..b547a54b6a7 100644 --- a/src/mongo/db/transaction/internal_transactions_reap_service.cpp +++ b/src/mongo/db/transaction/internal_transactions_reap_service.cpp @@ -114,10 +114,6 @@ void InternalTransactionsReapService::onShutdown() { void InternalTransactionsReapService::_reapInternalTransactions(ServiceContext* service) try { ThreadClient tc("reap-internal-transactions", service); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } auto uniqueOpCtx = tc->makeOperationContext(); auto opCtx = uniqueOpCtx.get(); opCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE(); diff --git a/src/mongo/db/transaction/transaction_api.cpp b/src/mongo/db/transaction/transaction_api.cpp index 627beebf9ea..6c3ed0f9c13 100644 --- a/src/mongo/db/transaction/transaction_api.cpp +++ b/src/mongo/db/transaction/transaction_api.cpp @@ -417,9 +417,6 @@ void primeInternalClient(Client* client) { if (as) { as->grantInternalAuthorization(client); } - - stdx::lock_guard<Client> lk(*client); - client->setSystemOperationKillableByStepdown(lk); } Future<DbResponse> DefaultSEPTransactionClientBehaviors::handleRequest( diff --git a/src/mongo/db/transaction/transaction_participant.cpp b/src/mongo/db/transaction/transaction_participant.cpp index 25589761451..beb399820e8 100644 --- a/src/mongo/db/transaction/transaction_participant.cpp +++ b/src/mongo/db/transaction/transaction_participant.cpp @@ -572,6 +572,13 @@ TransactionParticipant::getOldestActiveTimestamp(Timestamp stableTimestamp) { // the server, and it both blocks this thread from querying config.transactions and waits for // this thread to terminate. auto client = getGlobalServiceContext()->makeClient("OldestActiveTxnTimestamp"); + + // TODO(SERVER-74656): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + AlternativeClientRegion acr(client); try { @@ -2013,8 +2020,14 @@ void TransactionParticipant::Participant::_commitSplitPreparedTxnOnPrimary( auto splitClientOwned = userOpCtx->getServiceContext()->makeClient("tempSplitClient"); auto splitOpCtx = splitClientOwned->makeOperationContext(); - AlternativeClientRegion acr(splitClientOwned); + // TODO(SERVER-74656): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*splitClientOwned.get()); + splitClientOwned.get()->setSystemOperationUnkillableByStepdown(lk); + } + + AlternativeClientRegion acr(splitClientOwned); std::unique_ptr<MongoDSessionCatalog::Session> checkedOutSession; repl::UnreplicatedWritesBlock notReplicated(splitOpCtx.get()); @@ -2236,8 +2249,14 @@ void TransactionParticipant::Participant::_abortSplitPreparedTxnOnPrimary( auto splitClientOwned = opCtx->getServiceContext()->makeClient("tempSplitClient"); auto splitOpCtx = splitClientOwned->makeOperationContext(); - AlternativeClientRegion acr(splitClientOwned); + // TODO(SERVER-74656): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*splitClientOwned.get()); + splitClientOwned.get()->setSystemOperationUnkillableByStepdown(lk); + } + + AlternativeClientRegion acr(splitClientOwned); std::unique_ptr<MongoDSessionCatalog::Session> checkedOutSession; repl::UnreplicatedWritesBlock notReplicated(splitOpCtx.get()); diff --git a/src/mongo/db/ttl.cpp b/src/mongo/db/ttl.cpp index 8557eba416b..b2a94b47ad1 100644 --- a/src/mongo/db/ttl.cpp +++ b/src/mongo/db/ttl.cpp @@ -352,11 +352,6 @@ void TTLMonitor::run() { ThreadClient tc(name(), getGlobalServiceContext()); AuthorizationSession::get(cc())->grantInternalAuthorization(&cc()); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc.get()->setSystemOperationKillableByStepdown(lk); - } - while (true) { { auto startTime = Date_t::now(); @@ -617,11 +612,6 @@ bool TTLMonitor::_doTTLIndexDelete(OperationContext* opCtx, ExecutorFuture<void>(executor) .then([serviceContext = opCtx->getServiceContext(), nss, staleInfo] { ThreadClient tc("TTLShardVersionRecovery", serviceContext); - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } - auto uniqueOpCtx = tc->makeOperationContext(); auto opCtx = uniqueOpCtx.get(); diff --git a/src/mongo/db/vector_clock_mongod.cpp b/src/mongo/db/vector_clock_mongod.cpp index bee92fd5d2d..4ea2f7a2a5f 100644 --- a/src/mongo/db/vector_clock_mongod.cpp +++ b/src/mongo/db/vector_clock_mongod.cpp @@ -373,12 +373,6 @@ Future<void> VectorClockMongoD::_doWhileQueueNotEmptyOrError(ServiceContext* ser }(); ThreadClient tc("VectorClockStateOperation", service); - - { - stdx::lock_guard<Client> lk(*tc.get()); - tc->setSystemOperationKillableByStepdown(lk); - } - const auto opCtxHolder = tc->makeOperationContext(); auto* const opCtx = opCtxHolder.get(); diff --git a/src/mongo/embedded/embedded.cpp b/src/mongo/embedded/embedded.cpp index c1776683e7f..ee0fca37913 100644 --- a/src/mongo/embedded/embedded.cpp +++ b/src/mongo/embedded/embedded.cpp @@ -158,6 +158,12 @@ void shutdown(ServiceContext* srvContext) { auto const client = Client::getCurrent(); auto const serviceContext = client->getServiceContext(); + // TODO(SERVER-74659): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + serviceContext->setKillAllOperations(); // We should always be able to acquire the global lock at shutdown. @@ -202,6 +208,13 @@ ServiceContext* initialize(const char* yaml_config) { setGlobalServiceContext(ServiceContext::make()); Client::initThread("initandlisten"); + + // TODO(SERVER-74659): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + // Make sure current thread have no client set in thread_local when we leave this function ScopeGuard clientGuard([] { Client::releaseCurrent(); }); diff --git a/src/mongo/embedded/stitch_support/stitch_support.cpp b/src/mongo/embedded/stitch_support/stitch_support.cpp index b0c56833f0f..8141083d795 100644 --- a/src/mongo/embedded/stitch_support/stitch_support.cpp +++ b/src/mongo/embedded/stitch_support/stitch_support.cpp @@ -336,8 +336,14 @@ stitch_support_v1_matcher* matcher_create(stitch_support_v1_lib* const lib, "is not yet initialized."}; } - return new stitch_support_v1_matcher( - lib->serviceContext->makeClient("stitch_support"), filter, collator); + auto client = lib->serviceContext->makeClient("stitch_support"); + // TODO(SERVER-74662): Please revisit if this thread could be made killable. + { + stdx::lock_guard<mongo::Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + + return new stitch_support_v1_matcher(std::move(client), filter, collator); } stitch_support_v1_projection* projection_create(stitch_support_v1_lib* const lib, @@ -356,9 +362,14 @@ stitch_support_v1_projection* projection_create(stitch_support_v1_lib* const lib "Library is not yet initialized."}; } + auto client = lib->serviceContext->makeClient("stitch_support"); + // TODO(SERVER-74662): Please revisit if this thread could be made killable. + { + stdx::lock_guard<mongo::Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } - return new stitch_support_v1_projection( - lib->serviceContext->makeClient("stitch_support"), spec, matcher, collator); + return new stitch_support_v1_projection(std::move(client), spec, matcher, collator); } stitch_support_v1_update* update_create(stitch_support_v1_lib* const lib, @@ -378,11 +389,15 @@ stitch_support_v1_update* update_create(stitch_support_v1_lib* const lib, "Cannot create a new udpate when the Stitch Support Library is not yet initialized."}; } - return new stitch_support_v1_update(lib->serviceContext->makeClient("stitch_support"), - updateExpr, - arrayFilters, - matcher, - collator); + auto client = lib->serviceContext->makeClient("stitch_support"); + // TODO(SERVER-74662): Please revisit if this thread could be made killable. + { + stdx::lock_guard<mongo::Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + + return new stitch_support_v1_update( + std::move(client), updateExpr, arrayFilters, matcher, collator); } int capi_status_get_error(const stitch_support_v1_status* const status) noexcept { diff --git a/src/mongo/idl/cluster_server_parameter_refresher.cpp b/src/mongo/idl/cluster_server_parameter_refresher.cpp index e711ef20c1c..d19cf7248fb 100644 --- a/src/mongo/idl/cluster_server_parameter_refresher.cpp +++ b/src/mongo/idl/cluster_server_parameter_refresher.cpp @@ -64,6 +64,11 @@ getFCVAndClusterParametersFromConfigServer() { // Use an alternative client region, because we call refreshParameters both from the internal // refresher process and from getClusterParameter. auto altClient = getGlobalServiceContext()->makeClient("clusterParameterRefreshTransaction"); + // TODO(SERVER-74660): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*altClient.get()); + altClient.get()->setSystemOperationUnkillableByStepdown(lk); + } AlternativeClientRegion clientRegion(altClient); auto opCtx = cc().makeOperationContext(); auto as = AuthorizationSession::get(cc()); @@ -300,7 +305,9 @@ void ClusterServerParameterRefresher::start(ServiceContext* serviceCtx, Operatio PeriodicRunner::PeriodicJob job( "ClusterServerParameterRefresher", [serviceCtx](Client* client) { getClusterServerParameterRefresher(serviceCtx)->run(); }, - loadInterval()); + loadInterval(), + // TODO(SERVER-74659): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/); refresher->_job = std::make_unique<PeriodicJobAnchor>(periodicRunner->makeJob(std::move(job))); diff --git a/src/mongo/s/client/shard_registry.cpp b/src/mongo/s/client/shard_registry.cpp index 86eff1f56b8..64b4ca8828c 100644 --- a/src/mongo/s/client/shard_registry.cpp +++ b/src/mongo/s/client/shard_registry.cpp @@ -462,6 +462,12 @@ void ShardRegistry::updateReplicaSetOnConfigServer(ServiceContext* serviceContex const ConnectionString& connStr) noexcept { ThreadClient tc("UpdateReplicaSetOnConfigServer", serviceContext); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto opCtx = tc->makeOperationContext(); auto const grid = Grid::get(opCtx.get()); auto sr = grid->shardRegistry(); diff --git a/src/mongo/s/collection_uuid_mismatch.cpp b/src/mongo/s/collection_uuid_mismatch.cpp index 4df6f92067e..73152d663bd 100644 --- a/src/mongo/s/collection_uuid_mismatch.cpp +++ b/src/mongo/s/collection_uuid_mismatch.cpp @@ -50,6 +50,13 @@ Status populateCollectionUUIDMismatch(OperationContext* opCtx, // The listCollections command cannot be run in multi-document transactions, so run it using an // alternative client. auto client = opCtx->getServiceContext()->makeClient("populateCollectionUUIDMismatch"); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<mongo::Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto alternativeOpCtx = client->makeOperationContext(); opCtx = alternativeOpCtx.get(); AlternativeClientRegion acr{client}; diff --git a/src/mongo/s/config_server_catalog_cache_loader.cpp b/src/mongo/s/config_server_catalog_cache_loader.cpp index a9706b68a67..8cfb4a0e5b9 100644 --- a/src/mongo/s/config_server_catalog_cache_loader.cpp +++ b/src/mongo/s/config_server_catalog_cache_loader.cpp @@ -131,6 +131,13 @@ SemiFuture<CollectionAndChangedChunks> ConfigServerCatalogCacheLoader::getChunks .then([=]() { ThreadClient tc("ConfigServerCatalogCacheLoader::getChunksSince", getGlobalServiceContext()); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto opCtx = tc->makeOperationContext(); return getChangedChunks(opCtx.get(), nss, version); @@ -143,6 +150,13 @@ SemiFuture<DatabaseType> ConfigServerCatalogCacheLoader::getDatabase(StringData .then([name = dbName.toString()] { ThreadClient tc("ConfigServerCatalogCacheLoader::getDatabase", getGlobalServiceContext()); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto opCtx = tc->makeOperationContext(); return Grid::get(opCtx.get()) ->catalogClient() diff --git a/src/mongo/s/mongos_main.cpp b/src/mongo/s/mongos_main.cpp index 9d3a1dd0870..c6d086c17ba 100644 --- a/src/mongo/s/mongos_main.cpp +++ b/src/mongo/s/mongos_main.cpp @@ -230,6 +230,11 @@ void implicitlyAbortAllTransactions(OperationContext* opCtx) { }); auto newClient = opCtx->getServiceContext()->makeClient("ImplicitlyAbortTxnAtShutdown"); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<mongo::Client> lk(*newClient.get()); + newClient.get()->setSystemOperationUnkillableByStepdown(lk); + } AlternativeClientRegion acr(newClient); Status shutDownStatus(ErrorCodes::InterruptedAtShutdown, @@ -265,8 +270,15 @@ void cleanupTask(const ShutdownTaskArgs& shutdownArgs) { { // This client initiation pattern is only to be used here, with plans to eliminate this // pattern down the line. - if (!haveClient()) + if (!haveClient()) { Client::initThread(getThreadName()); + + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + } Client& client = cc(); ServiceContext::UniqueOperationContext uniqueTxn; @@ -677,6 +689,12 @@ private: ExitCode runMongosServer(ServiceContext* serviceContext) { ThreadClient tc("mongosMain", serviceContext); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + logMongosVersionInfo(nullptr); // Set up the periodic runner for background job execution diff --git a/src/mongo/s/query/cluster_cursor_cleanup_job.cpp b/src/mongo/s/query/cluster_cursor_cleanup_job.cpp index 005ad8908b8..f70e14e0dc8 100644 --- a/src/mongo/s/query/cluster_cursor_cleanup_job.cpp +++ b/src/mongo/s/query/cluster_cursor_cleanup_job.cpp @@ -50,6 +50,12 @@ std::string ClusterCursorCleanupJob::name() const { void ClusterCursorCleanupJob::run() { ThreadClient tc(name(), getGlobalServiceContext()); + // TODO(SERVER-74662): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto* const client = Client::getCurrent(); auto* const manager = Grid::get(client->getServiceContext())->getCursorManager(); invariant(manager); diff --git a/src/mongo/s/query_analysis_sampler.cpp b/src/mongo/s/query_analysis_sampler.cpp index 609134829c4..7f680dfd9a6 100644 --- a/src/mongo/s/query_analysis_sampler.cpp +++ b/src/mongo/s/query_analysis_sampler.cpp @@ -122,7 +122,9 @@ void QueryAnalysisSampler::onStartup() { PeriodicRunner::PeriodicJob queryStatsRefresherJob( "QueryAnalysisQueryStatsRefresher", [this](Client* client) { _refreshQueryStats(); }, - Seconds(1)); + Seconds(1), + // TODO(SERVER-74662): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/); _periodicQueryStatsRefresher = periodicRunner->makeJob(std::move(queryStatsRefresherJob)); _periodicQueryStatsRefresher.start(); @@ -139,7 +141,9 @@ void QueryAnalysisSampler::onStartup() { "error"_attr = redact(ex)); } }, - Seconds(gQueryAnalysisSamplerConfigurationRefreshSecs)); + Seconds(gQueryAnalysisSamplerConfigurationRefreshSecs), + // TODO(SERVER-74662): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/); _periodicConfigurationsRefresher = periodicRunner->makeJob(std::move(configurationsRefresherJob)); _periodicConfigurationsRefresher.start(); diff --git a/src/mongo/s/sharding_uptime_reporter.cpp b/src/mongo/s/sharding_uptime_reporter.cpp index 011a1c6f74d..14f827b46be 100644 --- a/src/mongo/s/sharding_uptime_reporter.cpp +++ b/src/mongo/s/sharding_uptime_reporter.cpp @@ -119,6 +119,12 @@ void ShardingUptimeReporter::startPeriodicThread() { _thread = stdx::thread([created] { Client::initThread("Uptime-reporter"); + // TODO(SERVER-74658): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + const std::string hostName(getHostNameCached()); const std::string instanceId(constructInstanceIdString(hostName)); const Timer upTimeTimer; diff --git a/src/mongo/scripting/mozjs/jsthread.cpp b/src/mongo/scripting/mozjs/jsthread.cpp index caa010f7083..be698fc82ae 100644 --- a/src/mongo/scripting/mozjs/jsthread.cpp +++ b/src/mongo/scripting/mozjs/jsthread.cpp @@ -193,6 +193,13 @@ private: MozJSImplScope scope(static_cast<MozJSScriptEngine*>(getGlobalScriptEngine()), boost::none /* Don't override global jsHeapLimitMB */); Client::initThread("js"); + + // TODO(SERVER-74662): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + scope.setParentStack(thisv->_sharedData->_stack); thisv->_sharedData->_returnData = scope.callThreadArgs(thisv->_sharedData->_args); } catch (...) { diff --git a/src/mongo/scripting/mozjs/proxyscope.cpp b/src/mongo/scripting/mozjs/proxyscope.cpp index 1adc91f78d3..23d3194799c 100644 --- a/src/mongo/scripting/mozjs/proxyscope.cpp +++ b/src/mongo/scripting/mozjs/proxyscope.cpp @@ -364,9 +364,16 @@ void MozJSProxyScope::shutdownThread() { * break out of the loop and return. */ void MozJSProxyScope::implThread(MozJSProxyScope* proxy) { - if (hasGlobalServiceContext()) + if (hasGlobalServiceContext()) { Client::initThread("js"); + // TODO(SERVER-74662): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); + } + } + std::unique_ptr<MozJSImplScope> scope; // This will leave _status set for the first noop runOnImplThread(), which diff --git a/src/mongo/transport/grpc/grpc_transport_layer.cpp b/src/mongo/transport/grpc/grpc_transport_layer.cpp index aed304f904b..83f21794287 100644 --- a/src/mongo/transport/grpc/grpc_transport_layer.cpp +++ b/src/mongo/transport/grpc/grpc_transport_layer.cpp @@ -66,7 +66,9 @@ Status GRPCTransportLayer::start() try { [pool = _channelPool](Client*) { pool->dropIdleChannels(GRPCTransportLayer::kDefaultChannelTimeout); }, - kDefaultChannelTimeout); + kDefaultChannelTimeout, + // TODO(SERVER-74659): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/); invariant(!_idleChannelPruner); _idleChannelPruner.emplace(_svcCtx->getPeriodicRunner()->makeJob(std::move(prunerJob))); _idleChannelPruner->start(); diff --git a/src/mongo/util/diagnostic_info.cpp b/src/mongo/util/diagnostic_info.cpp index eba04ab4c37..d395aa7ff05 100644 --- a/src/mongo/util/diagnostic_info.cpp +++ b/src/mongo/util/diagnostic_info.cpp @@ -104,6 +104,12 @@ void BlockedOp::start(ServiceContext* serviceContext) { _latchState.thread = stdx::thread([this, serviceContext]() mutable { ThreadClient tc("DiagnosticCaptureTestLatch", serviceContext); + // TODO(SERVER-74659): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + LOGV2(23123, "Entered currentOpSpawnsThreadWaitingForLatch thread"); stdx::lock_guard testLock(_latchState.mutex); @@ -113,6 +119,13 @@ void BlockedOp::start(ServiceContext* serviceContext) { _interruptibleState.thread = stdx::thread([this, serviceContext]() mutable { ThreadClient tc("DiagnosticCaptureTestInterruptible", serviceContext); + + // TODO(SERVER-74659): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto opCtx = tc->makeOperationContext(); LOGV2(23125, "Entered currentOpSpawnsThreadWaitingForLatch thread for interruptibles"); diff --git a/src/mongo/util/net/ocsp/ocsp_manager.cpp b/src/mongo/util/net/ocsp/ocsp_manager.cpp index 1524d1da560..e199990ce43 100644 --- a/src/mongo/util/net/ocsp/ocsp_manager.cpp +++ b/src/mongo/util/net/ocsp/ocsp_manager.cpp @@ -48,6 +48,10 @@ auto makeTaskExecutor() { tpOptions.maxThreads = 10; tpOptions.onCreateThread = [](const std::string& threadName) { Client::initThread(threadName.c_str()); + + // TODO(SERVER-74660): Please revisit if this thread could be made killable. + stdx::lock_guard<Client> lk(cc()); + cc().setSystemOperationUnkillableByStepdown(lk); }; return std::make_unique<ThreadPool>(tpOptions); } diff --git a/src/mongo/util/net/private/ssl_expiration.cpp b/src/mongo/util/net/private/ssl_expiration.cpp index 8ba42b85ed8..fd3915ab1d0 100644 --- a/src/mongo/util/net/private/ssl_expiration.cpp +++ b/src/mongo/util/net/private/ssl_expiration.cpp @@ -63,7 +63,11 @@ void CertificateExpirationMonitor::start(ServiceContext* service) { invariant(periodicRunner); PeriodicRunner::PeriodicJob job( - "CertificateExpirationMonitor", [this](Client* client) { return run(client); }, oneDay); + "CertificateExpirationMonitor", + [this](Client* client) { return run(client); }, + oneDay, + // TODO(SERVER-74660): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/); _job = std::make_unique<PeriodicJobAnchor>(periodicRunner->makeJob(std::move(job))); _job->start(); diff --git a/src/mongo/util/net/ssl_manager_openssl.cpp b/src/mongo/util/net/ssl_manager_openssl.cpp index 076c054d09c..8ad516e3dc7 100644 --- a/src/mongo/util/net/ssl_manager_openssl.cpp +++ b/src/mongo/util/net/ssl_manager_openssl.cpp @@ -2285,7 +2285,9 @@ void OCSPFetcher::startPeriodicJob(StatusWith<Milliseconds> swDurationInitial) { getGlobalServiceContext()->getPeriodicRunner()->makeJob(PeriodicRunner::PeriodicJob( "OCSP Fetch and Staple", [this, sm = _manager->shared_from_this()](Client* client) { doPeriodicJob(); }, - getPeriodForStapleJob(swDurationInitial))); + getPeriodForStapleJob(swDurationInitial), + // TODO(SERVER-74660): Please revisit if this periodic job could be made killable. + false /*isKillableByStepdown*/)); _ocspStaplingAnchor.start(); } diff --git a/src/mongo/util/periodic_runner.h b/src/mongo/util/periodic_runner.h index d7ecb1c7f1b..0373d8ed0dc 100644 --- a/src/mongo/util/periodic_runner.h +++ b/src/mongo/util/periodic_runner.h @@ -60,8 +60,11 @@ public: using JobAnchor = PeriodicJobAnchor; struct PeriodicJob { - PeriodicJob(std::string name, Job callable, Milliseconds period) - : name(std::move(name)), job(std::move(callable)), interval(period) {} + PeriodicJob(std::string name, Job callable, Milliseconds period, bool isKillableByStepdown) + : name(std::move(name)), + job(std::move(callable)), + interval(period), + isKillableByStepdown(isKillableByStepdown) {} /** * name of the job @@ -77,6 +80,11 @@ public: * An interval at which the job should be run. */ Milliseconds interval; + + /** + * Whether this job is killable during stepdown. + */ + bool isKillableByStepdown; }; /** diff --git a/src/mongo/util/periodic_runner_impl.cpp b/src/mongo/util/periodic_runner_impl.cpp index 6d66d5f643d..b4fc892999b 100644 --- a/src/mongo/util/periodic_runner_impl.cpp +++ b/src/mongo/util/periodic_runner_impl.cpp @@ -75,6 +75,12 @@ void PeriodicRunnerImpl::PeriodicJobImpl::_run() { ON_BLOCK_EXIT([this] { _stopPromise.emplaceValue(); }); ThreadClient client(_job.name, _serviceContext, nullptr); + + if (!_job.isKillableByStepdown) { + stdx::lock_guard<Client> lk(*client.get()); + client.get()->setSystemOperationUnkillableByStepdown(lk); + } + { // This ensures client object is not destructed so long as others can access it. ON_BLOCK_EXIT([this] { diff --git a/src/mongo/util/periodic_runner_impl_test.cpp b/src/mongo/util/periodic_runner_impl_test.cpp index 8985414f418..6b90b5d46a2 100644 --- a/src/mongo/util/periodic_runner_impl_test.cpp +++ b/src/mongo/util/periodic_runner_impl_test.cpp @@ -77,7 +77,7 @@ public: auto makeStoppedJob() { PeriodicRunner::PeriodicJob job( - "job", [](Client* client) {}, Seconds{1}); + "job", [](Client* client) {}, Seconds{1}, false); auto jobAnchor = runner().makeJob(std::move(job)); jobAnchor.start(); jobAnchor.stop(); @@ -102,7 +102,8 @@ TEST_F(PeriodicRunnerImplTest, OneJobTest) { } cv.notify_all(); }, - interval); + interval, + false); auto jobAnchor = runner().makeJob(std::move(job)); jobAnchor.start(); @@ -136,7 +137,8 @@ TEST_F(PeriodicRunnerImplTest, OnePausableJobDoesNotRunWithoutStart) { } cv.notify_all(); }, - interval); + interval, + false); auto jobAnchor = runner().makeJob(std::move(job)); clockSource().advance(interval); @@ -162,7 +164,8 @@ TEST_F(PeriodicRunnerImplTest, OnePausableJobRunsCorrectlyWithStart) { } cv.notify_all(); }, - interval); + interval, + false); auto jobAnchor = runner().makeJob(std::move(job)); jobAnchor.start(); @@ -198,7 +201,8 @@ TEST_F(PeriodicRunnerImplTest, OnePausableJobPausesCorrectly) { } cv.notify_all(); }, - interval); + interval, + false); auto jobAnchor = runner().makeJob(std::move(job)); jobAnchor.start(); @@ -241,7 +245,8 @@ TEST_F(PeriodicRunnerImplTest, OnePausableJobResumesCorrectly) { } cv.notify_all(); }, - interval); + interval, + false); auto jobAnchor = runner().makeJob(std::move(job)); jobAnchor.start(); @@ -309,7 +314,8 @@ TEST_F(PeriodicRunnerImplTest, TwoJobsTest) { } cv.notify_all(); }, - intervalA); + intervalA, + false); PeriodicRunner::PeriodicJob jobB( "job", @@ -320,7 +326,8 @@ TEST_F(PeriodicRunnerImplTest, TwoJobsTest) { } cv.notify_all(); }, - intervalB); + intervalB, + false); auto jobAnchorA = runner().makeJob(std::move(jobA)); auto jobAnchorB = runner().makeJob(std::move(jobB)); @@ -357,7 +364,8 @@ TEST_F(PeriodicRunnerImplTest, TwoJobsDontDeadlock) { cv.wait(lk, [&] { return b; }); doneCv.notify_one(); }, - Milliseconds(1)); + Milliseconds(1), + false); PeriodicRunner::PeriodicJob jobB( "job", @@ -369,7 +377,8 @@ TEST_F(PeriodicRunnerImplTest, TwoJobsDontDeadlock) { cv.wait(lk, [&] { return a; }); doneCv.notify_one(); }, - Milliseconds(1)); + Milliseconds(1), + false); auto jobAnchorA = runner().makeJob(std::move(jobA)); auto jobAnchorB = runner().makeJob(std::move(jobB)); @@ -406,7 +415,8 @@ TEST_F(PeriodicRunnerImplTest, ChangingIntervalWorks) { } cv.notify_one(); }, - Milliseconds(5)); + Milliseconds(5), + false); auto jobAnchor = runner().makeJob(std::move(job)); jobAnchor.start(); @@ -485,7 +495,8 @@ TEST_F(PeriodicRunnerImplTest, StopProperlyInterruptsOpCtx) { MONGO_UNREACHABLE; }, - interval); + interval, + false); auto jobAnchor = runner().makeJob(std::move(job)); jobAnchor.start(); diff --git a/src/mongo/util/read_through_cache.cpp b/src/mongo/util/read_through_cache.cpp index 9853b3a36af..2dfa8056a8d 100644 --- a/src/mongo/util/read_through_cache.cpp +++ b/src/mongo/util/read_through_cache.cpp @@ -78,6 +78,13 @@ ReadThroughCacheBase::CancelToken ReadThroughCacheBase::_asyncWork( } ThreadClient tc(taskInfo->service); + + // TODO(SERVER-74659): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*tc.get()); + tc.get()->setSystemOperationUnkillableByStepdown(lk); + } + auto opCtxHolder = tc->makeOperationContext(); cancelStatusAtTaskBegin = [&] { diff --git a/src/mongo/watchdog/watchdog.cpp b/src/mongo/watchdog/watchdog.cpp index 227291f4784..83c6fc70317 100644 --- a/src/mongo/watchdog/watchdog.cpp +++ b/src/mongo/watchdog/watchdog.cpp @@ -130,6 +130,12 @@ void WatchdogPeriodicThread::doLoop() { Client::initThread(_threadName); Client* client = &cc(); + // TODO(SERVER-74659): Please revisit if this thread could be made killable. + { + stdx::lock_guard<Client> lk(*client); + client->setSystemOperationUnkillableByStepdown(lk); + } + auto preciseClockSource = client->getServiceContext()->getPreciseClockSource(); { |