From 7afcf9895eb7da296f602604e2973cdb6fa0c67f Mon Sep 17 00:00:00 2001 From: Benety Goh Date: Thu, 9 Jul 2020 14:49:06 -0400 Subject: SERVER-48617 do not hold global lock while waiting for index build thread resource --- src/mongo/db/index_builds_coordinator_mongod.cpp | 28 ++++++++++++++---------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp index 2a2fd13169e..5a3f9061e90 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.cpp +++ b/src/mongo/db/index_builds_coordinator_mongod.cpp @@ -128,19 +128,25 @@ IndexBuildsCoordinatorMongod::startIndexBuild(OperationContext* opCtx, // Only operations originating from user connections need to wait while there are more than // 'maxNumActiveUserIndexBuilds' index builds currently running. if (opCtx->getClient()->isFromUserConnection()) { - // Need to follow the locking order here by getting the global lock first followed by - // the mutex. The global lock acquires the RSTL lock which we use to assert that we're - // the primary node when running user operations. - ShouldNotConflictWithSecondaryBatchApplicationBlock shouldNotConflictBlock( - opCtx->lockState()); - Lock::GlobalLock globalLk(opCtx, MODE_IX); + { + // The global lock acquires the RSTL lock which we use to assert that we're the + // primary node when running user operations. Additionally, releasing this lock + // allows the node to step down after we have checked the replication state. If this + // node steps down after this check, similar assertions will cause the index build + // to fail later on when locks are reacquired. Therefore, this assertion is not + // required for correctness, but only intended to rate limit index builds started on + // primaries. + ShouldNotConflictWithSecondaryBatchApplicationBlock shouldNotConflictBlock( + opCtx->lockState()); + Lock::GlobalLock globalLk(opCtx, MODE_IX); + + auto replCoord = repl::ReplicationCoordinator::get(opCtx); + uassert(ErrorCodes::NotMaster, + "Not primary while waiting to start an index build", + replCoord->canAcceptWritesFor(opCtx, nssOrUuid)); + } stdx::unique_lock lk(_mutex); - - auto replCoord = repl::ReplicationCoordinator::get(opCtx); - uassert(ErrorCodes::NotMaster, - "Not primary while waiting to start an index build", - replCoord->canAcceptWritesFor(opCtx, nssOrUuid)); opCtx->waitForConditionOrInterrupt(_indexBuildFinished, lk, [&] { const int maxActiveBuilds = maxNumActiveUserIndexBuilds.load(); if (_numActiveIndexBuilds < maxActiveBuilds) { -- cgit v1.2.1