diff options
Diffstat (limited to 'src/mongo/db/index_builds_coordinator.cpp')
-rw-r--r-- | src/mongo/db/index_builds_coordinator.cpp | 155 |
1 files changed, 117 insertions, 38 deletions
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index de5004df03d..17fe7380a5d 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -94,7 +94,10 @@ MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeWaitingUntilMajorityOpTime); MONGO_FAIL_POINT_DEFINE(hangBeforeUnregisteringAfterCommit); MONGO_FAIL_POINT_DEFINE(failSetUpResumeIndexBuild); MONGO_FAIL_POINT_DEFINE(failIndexBuildWithError); +MONGO_FAIL_POINT_DEFINE(failIndexBuildWithErrorInSecondDrain); MONGO_FAIL_POINT_DEFINE(hangInRemoveIndexBuildEntryAfterCommitOrAbort); +MONGO_FAIL_POINT_DEFINE(hangAbortIndexBuildByBuildUUIDAfterLocks); + IndexBuildsCoordinator::IndexBuildsSSS::IndexBuildsSSS() : ServerStatusSection("indexBuilds"), @@ -1409,6 +1412,8 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx, AutoGetCollection indexBuildEntryColl( opCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX); + hangAbortIndexBuildByBuildUUIDAfterLocks.pauseWhileSet(); + // If we are using two-phase index builds and are no longer primary after receiving an // abort, we cannot replicate an abortIndexBuild oplog entry. Continue holding the RSTL to // check the replication state and to prevent any state transitions from happening while @@ -2157,7 +2162,8 @@ StatusWith<std::tuple<Lock::DBLock, repl::ReplicationStateTransitionLockGuard>> IndexBuildsCoordinator::_acquireExclusiveLockWithRSTLRetry(OperationContext* opCtx, ReplIndexBuildState* replState, - bool retry) { + bool retry, + bool collLockTimeout) { while (true) { // Skip the check for sharding's critical section check as it can only be acquired during a @@ -2168,8 +2174,20 @@ IndexBuildsCoordinator::_acquireExclusiveLockWithRSTLRetry(OperationContext* opC /*.skipRSTLLock=*/true}; Lock::DBLock dbLock{opCtx, replState->dbName, MODE_IX, Date_t::max(), lockOptions}; - CollectionNamespaceOrUUIDLock collLock{ - opCtx, {replState->dbName, replState->collectionUUID}, MODE_X}; + Date_t collLockDeadline = Date_t::max(); + if (collLockTimeout) { + collLockDeadline = Date_t::now() + Milliseconds{100}; + } + + boost::optional<CollectionNamespaceOrUUIDLock> collLock; + try { + collLock.emplace(opCtx, + NamespaceStringOrUUID{replState->dbName, replState->collectionUUID}, + MODE_X, + collLockDeadline); + } catch (const ExceptionFor<ErrorCodes::LockTimeout>& ex) { + return ex.toStatus(); + } // If we can't acquire the RSTL within a given time period, there is an active state // transition and we should release our locks and try again. We would otherwise introduce a @@ -2200,7 +2218,7 @@ IndexBuildsCoordinator::_acquireExclusiveLockWithRSTLRetry(OperationContext* opC continue; } - return std::make_tuple(std::move(dbLock), std::move(collLock), std::move(rstl)); + return std::make_tuple(std::move(dbLock), std::move(collLock.value()), std::move(rstl)); } } @@ -2648,18 +2666,38 @@ void IndexBuildsCoordinator::_cleanUpSinglePhaseAfterNonShutdownFailure( // would prevent us from taking locks. Use a new OperationContext to abort the index build. runOnAlternateContext( opCtx, "self-abort", [this, replState, status](OperationContext* abortCtx) { - ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(abortCtx->lockState()); - // Skip RSTL to avoid deadlocks with prepare conflicts and state transitions caused by - // taking a strong collection lock. See SERVER-42621. - Lock::DBLockSkipOptions lockOptions{/*.skipFlowControlTicket=*/false, - /*.skipRSTLLock=*/true}; - Lock::DBLock dbLock(abortCtx, replState->dbName, MODE_IX, Date_t::max(), lockOptions); - - const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID); - CollectionNamespaceOrUUIDLock collLock(abortCtx, dbAndUUID, MODE_X); - AutoGetCollection indexBuildEntryColl( - abortCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX); - _completeSelfAbort(abortCtx, replState, *indexBuildEntryColl, status); + // TODO (SERVER-76935): Remove collection lock timeout and abort state check loop. + // To avoid potential deadlocks with concurrent external aborts, which hold the + // collection MODE_X lock while waiting for this thread to signal its exit, the + // collection lock is acquired with a timeout, and retried only if the build is not + // already aborted (externally). + while (!replState->isAborted()) { + try { + ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict( + abortCtx->lockState()); + // Skip RSTL to avoid deadlocks with prepare conflicts and state transitions + // caused by taking a strong collection lock. See SERVER-42621. + Lock::DBLockSkipOptions lockOptions{/*.skipFlowControlTicket=*/false, + /*.skipRSTLLock=*/true}; + Lock::DBLock dbLock( + abortCtx, replState->dbName, MODE_IX, Date_t::max(), lockOptions); + + const NamespaceStringOrUUID dbAndUUID(replState->dbName, + replState->collectionUUID); + CollectionNamespaceOrUUIDLock collLock( + abortCtx, dbAndUUID, MODE_X, Date_t::now() + Milliseconds{100}); + AutoGetCollection indexBuildEntryColl( + abortCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX); + _completeSelfAbort(abortCtx, replState, *indexBuildEntryColl, status); + } catch (const ExceptionFor<ErrorCodes::LockTimeout>&) { + LOGV2(7677700, + "Unable to acquire collection lock within the timeout, a concurrent " + "abort might be waiting for the builder thread to exit. Rechecking if " + "self abort is still required.", + "buildUUID"_attr = replState->buildUUID); + continue; + } + } }); } @@ -2707,27 +2745,47 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterNonShutdownFailure( ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict( abortCtx->lockState()); - // Take RSTL to observe and prevent replication state from changing. This is done - // with the release/reacquire strategy to avoid deadlock with prepared txns. - auto [dbLock, collLock, rstl] = std::move( - _acquireExclusiveLockWithRSTLRetry(abortCtx, replState.get()).getValue()); - - const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID); - auto replCoord = repl::ReplicationCoordinator::get(abortCtx); - if (!replCoord->canAcceptWritesFor(abortCtx, dbAndUUID)) { - // Index builds may not fail on secondaries. If a primary replicated - // an abortIndexBuild oplog entry, then this index build would have - // received an IndexBuildAborted error code. - fassert(51101, - status.withContext(str::stream() - << "Index build: " << replState->buildUUID - << "; Database: " - << replState->dbName.toStringForErrorMsg())); - } + // TODO (SERVER-76935): Remove collection lock timeout and abort state check loop. + // To avoid potential deadlocks with concurrent external aborts, which hold the + // collection MODE_X lock while waiting for this thread to signal its exit, the + // collection lock is acquired with a timeout, and retried only if the build is not + // already aborted (externally). + while (!replState->isAborted()) { + // Take RSTL to observe and prevent replication state from changing. This is + // done with the release/reacquire strategy to avoid deadlock with prepared + // txns. + auto swLocks = _acquireExclusiveLockWithRSTLRetry( + abortCtx, replState.get(), /*retry=*/true, /*collLockTimeout=*/true); + if (!swLocks.isOK()) { + LOGV2_DEBUG( + 7677701, + 1, + "Index build: lock acquisition for self-abort failed, will retry.", + "buildUUD"_attr = replState->buildUUID, + "error"_attr = swLocks.getStatus()); + continue; + } - AutoGetCollection indexBuildEntryColl( - abortCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX); - _completeSelfAbort(abortCtx, replState, *indexBuildEntryColl, status); + auto [dbLock, collLock, rstl] = std::move(swLocks.getValue()); + + const NamespaceStringOrUUID dbAndUUID(replState->dbName, + replState->collectionUUID); + auto replCoord = repl::ReplicationCoordinator::get(abortCtx); + if (!replCoord->canAcceptWritesFor(abortCtx, dbAndUUID)) { + // Index builds may not fail on secondaries. If a primary replicated + // an abortIndexBuild oplog entry, then this index build would have + // received an IndexBuildAborted error code. + fassert(51101, + status.withContext(str::stream() + << "Index build: " << replState->buildUUID + << "; Database: " + << replState->dbName.toStringForErrorMsg())); + } + + AutoGetCollection indexBuildEntryColl( + abortCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX); + _completeSelfAbort(abortCtx, replState, *indexBuildEntryColl, status); + } } }); } @@ -2791,14 +2849,25 @@ void IndexBuildsCoordinator::_runIndexBuildInner( // If the index build has already been cleaned-up because it encountered an error, there is no // work to do. If feature flag IndexBuildGracefulErrorHandling is not enabled, the most routine // case is for this to be due to a self-abort caused by constraint checking during the commit - // phase. When the flag is enabled, constraint violations cause the index build to abort - // immediately on primaries, and an async external abort is requested. + // phase. When the flag is enabled, constraint violations cause the index build to fail + // immediately, but is not yet set to aborted, so an external async abort will be requested + // later on. It is also possible the build was concurrently aborted, between the detection of + // the failure and this check here, in which case we exit early. if (replState->isAborted()) { if (ErrorCodes::isTenantMigrationError(replState->getAbortStatus())) uassertStatusOK(replState->getAbortStatus()); uassertStatusOK(status); } + // TODO (SERVER-76935): Remove collection lock timeout. + // It is also possible for the concurrent abort to happen after the check. This is an issue as + // external aborters hold the collection MODE_X lock while waiting for this thread to signal the + // promise, but if this thread proceeds beyond this check first it will try to acquire the + // collection lock before signaling the promise, potentially creating a deadlock. This is worked + // around by adding a timeout to the collection lock in the self-abort path, and rechecking if + // the build was aborted externally on timeout. + + // We do not hold a collection lock here, but we are protected against the collection being // dropped while the index build is still registered for the collection -- until abortIndexBuild // is called. The collection can be renamed, but it is OK for the name to be stale just for @@ -3134,6 +3203,16 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites( const IndexBuildOptions& indexBuildOptions) { indexBuildsSSS.drainSideWritesTablePreCommit.addAndFetch(1); const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID); + + failIndexBuildWithErrorInSecondDrain.executeIf( + [](const BSONObj& data) { + uasserted(data["error"].safeNumberInt(), + "failIndexBuildWithErrorInSecondDrain failpoint triggered"); + }, + [&](const BSONObj& data) { + return UUID::parse(data["buildUUID"]) == replState->buildUUID; + }); + // Perform the second drain while stopping writes on the collection. { // Skip RSTL to avoid deadlocks with prepare conflicts and state transitions. See |