diff options
author | Gregory Wlodarek <gregory.wlodarek@mongodb.com> | 2020-04-01 18:08:04 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-02 00:16:18 +0000 |
commit | c7818dde1a910b6828f24e1e9670688bdd5e5a68 (patch) | |
tree | 0d00317fe2ca8112eb8f871f249195ffb7d93626 | |
parent | b3d8a4c33e27f8d4536e6526d2ebb44d289b89c4 (diff) | |
download | mongo-c7818dde1a910b6828f24e1e9670688bdd5e5a68.tar.gz |
SERVER-47155 Limit the number of simultaneous index builds running from user connections to 3
-rw-r--r-- | src/mongo/db/index_builds_coordinator_mongod.cpp | 89 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator_mongod.h | 7 | ||||
-rw-r--r-- | src/mongo/db/storage/two_phase_index_build_knobs.idl | 15 |
3 files changed, 101 insertions, 10 deletions
diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp index 2eda872cbea..48be752b76f 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.cpp +++ b/src/mongo/db/index_builds_coordinator_mongod.cpp @@ -60,6 +60,8 @@ namespace { MONGO_FAIL_POINT_DEFINE(hangBeforeInitializingIndexBuild); MONGO_FAIL_POINT_DEFINE(hangAfterInitializingIndexBuild); +const StringData kMaxNumActiveUserIndexBuildsServerParameterName = "maxNumActiveUserIndexBuilds"_sd; + /** * Constructs the options for the loader thread pool. */ @@ -67,10 +69,13 @@ ThreadPool::Options makeDefaultThreadPoolOptions() { ThreadPool::Options options; options.poolName = "IndexBuildsCoordinatorMongod"; options.minThreads = 0; - // We depend on thread pool sizes being equal between primaries and secondaries. If a secondary - // has fewer resources than a primary, index build oplog entries can replicate in an order that - // the secondary is unable to fulfill, leading to deadlocks. See SERVER-44250. - options.maxThreads = 3; + // Both the primary and secondary nodes will have an unlimited thread pool size. This is done to + // allow secondary nodes to startup as many index builders as necessary in order to prevent + // scheduling deadlocks during initial sync or oplog application. When commands are run from + // user connections that need to create indexes, those commands will hang until there are less + // than 'maxNumActiveUserIndexBuilds' running index build threads, or until the operation is + // interrupted. + options.maxThreads = ThreadPool::Options::kUnlimited; // Ensure all threads have a client. options.onCreateThread = [](const std::string& threadName) { @@ -85,10 +90,18 @@ ThreadPool::Options makeDefaultThreadPoolOptions() { IndexBuildsCoordinatorMongod::IndexBuildsCoordinatorMongod() : _threadPool(makeDefaultThreadPoolOptions()) { _threadPool.startup(); -} -IndexBuildsCoordinatorMongod::IndexBuildsCoordinatorMongod(ThreadPool::Options options) - : _threadPool(std::move(options)) { - _threadPool.startup(); + + // Change the 'setOnUpdate' function for the server parameter to signal the condition variable + // when the value changes. + ServerParameter* serverParam = + ServerParameterSet::getGlobal()->get(kMaxNumActiveUserIndexBuildsServerParameterName); + static_cast< + IDLServerParameterWithStorage<ServerParameterType::kStartupAndRuntime, AtomicWord<int>>*>( + serverParam) + ->setOnUpdate([this](const int) -> Status { + _indexBuildFinished.notify_all(); + return Status::OK(); + }); } void IndexBuildsCoordinatorMongod::shutdown() { @@ -110,6 +123,55 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx, const UUID& buildUUID, IndexBuildProtocol protocol, IndexBuildOptions indexBuildOptions) { + const NamespaceStringOrUUID nssOrUuid{dbName, collectionUUID}; + + { + // Only operations originating from user connections need to wait while there are more than + // 'maxNumActiveUserIndexBuilds' index builds currently running. + if (opCtx->getClient()->isFromUserConnection()) { + // Need to follow the locking order here by getting the global lock first followed by + // the mutex. The global lock acquires the RSTL lock which we use to assert that we're + // the primary node when running user operations. + ShouldNotConflictWithSecondaryBatchApplicationBlock shouldNotConflictBlock( + opCtx->lockState()); + Lock::GlobalLock globalLk(opCtx, MODE_IX); + + stdx::unique_lock<Latch> lk(_mutex); + + auto replCoord = repl::ReplicationCoordinator::get(opCtx); + uassert(ErrorCodes::NotMaster, + "Not primary while waiting to start an index build", + replCoord->canAcceptWritesFor(opCtx, nssOrUuid)); + opCtx->waitForConditionOrInterrupt(_indexBuildFinished, lk, [&] { + const int maxActiveBuilds = maxNumActiveUserIndexBuilds.load(); + if (_numActiveIndexBuilds < maxActiveBuilds) { + _numActiveIndexBuilds++; + return true; + } + + LOGV2(4715500, + "Too many index builds running simultaneously, waiting until the number of " + "active index builds is below the threshold", + "numActiveIndexBuilds"_attr = _numActiveIndexBuilds, + "maxNumActiveUserIndexBuilds"_attr = maxActiveBuilds, + "indexSpecs"_attr = specs, + "buildUUID"_attr = buildUUID, + "collectionUUID"_attr = collectionUUID); + return false; + }); + } else { + // System index builds have no limit and never wait, but do consume a slot. + stdx::unique_lock<Latch> lk(_mutex); + _numActiveIndexBuilds++; + } + } + + auto onScopeExitGuard = makeGuard([&] { + stdx::unique_lock<Latch> lk(_mutex); + _numActiveIndexBuilds--; + _indexBuildFinished.notify_one(); + }); + if (indexBuildOptions.twoPhaseRecovery) { // Two phase index build recovery goes though a different set-up procedure because the // original index will be dropped first. @@ -150,7 +212,6 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx, const auto deadline = opCtx->getDeadline(); const auto timeoutError = opCtx->getTimeoutError(); - const NamespaceStringOrUUID nssOrUuid{dbName, collectionUUID}; const auto nss = CollectionCatalog::get(opCtx).resolveNamespaceStringOrUUID(opCtx, nssOrUuid); const auto& oss = OperationShardingState::get(opCtx); @@ -179,6 +240,10 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx, auto replState = invariant(_getIndexBuild(buildUUID)); + + // The thread pool task will be responsible for signalling the condition variable when the index + // build thread is done running. + onScopeExitGuard.dismiss(); _threadPool.schedule([ this, buildUUID, @@ -195,6 +260,12 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx, shardVersion, dbVersion ](auto status) mutable noexcept { + auto onScopeExitGuard = makeGuard([&] { + stdx::unique_lock<Latch> lk(_mutex); + _numActiveIndexBuilds--; + _indexBuildFinished.notify_one(); + }); + // Clean up if we failed to schedule the task. if (!status.isOK()) { stdx::unique_lock<Latch> lk(_mutex); diff --git a/src/mongo/db/index_builds_coordinator_mongod.h b/src/mongo/db/index_builds_coordinator_mongod.h index 8dd2335dd83..7596a4ddb97 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.h +++ b/src/mongo/db/index_builds_coordinator_mongod.h @@ -55,7 +55,6 @@ public: * Sets up the thread pool. */ IndexBuildsCoordinatorMongod(); - IndexBuildsCoordinatorMongod(ThreadPool::Options options); /** * Shuts down the thread pool, signals interrupt to all index builds, then waits for all of the @@ -160,6 +159,12 @@ private: // Thread pool on which index builds are run. ThreadPool _threadPool; + + // Protected by _mutex. + int _numActiveIndexBuilds = 0; + + // Condition signalled to indicate that an index build thread finished executing. + stdx::condition_variable _indexBuildFinished; }; } // namespace mongo diff --git a/src/mongo/db/storage/two_phase_index_build_knobs.idl b/src/mongo/db/storage/two_phase_index_build_knobs.idl index dbcdc45d1e3..c2a176c91f5 100644 --- a/src/mongo/db/storage/two_phase_index_build_knobs.idl +++ b/src/mongo/db/storage/two_phase_index_build_knobs.idl @@ -46,3 +46,18 @@ server_parameters: cpp_vartype: bool cpp_varname: "enableIndexBuildCommitQuorum" default: true + + maxNumActiveUserIndexBuilds: + description: > + Specifies the maximum number of active user index builds that can be built simultaneously on + the primary node. Index builds initiated by the system are not subject to this limitation. + + Additionally, active index builds initiated by the system count towards the limit and can + delay scheduling user index builds even when the number of active user index builds is below + the limit. + set_at: [ startup, runtime ] + cpp_vartype: AtomicWord<int> + cpp_varname: maxNumActiveUserIndexBuilds + default: 3 + validator: + gte: 0 |