1 files changed, 117 insertions, 38 deletions
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index de5004df03d..17fe7380a5d 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -94,7 +94,10 @@ MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeWaitingUntilMajorityOpTime);
 MONGO_FAIL_POINT_DEFINE(hangBeforeUnregisteringAfterCommit);
 MONGO_FAIL_POINT_DEFINE(failSetUpResumeIndexBuild);
 MONGO_FAIL_POINT_DEFINE(failIndexBuildWithError);
+MONGO_FAIL_POINT_DEFINE(failIndexBuildWithErrorInSecondDrain);
 MONGO_FAIL_POINT_DEFINE(hangInRemoveIndexBuildEntryAfterCommitOrAbort);
+MONGO_FAIL_POINT_DEFINE(hangAbortIndexBuildByBuildUUIDAfterLocks);
+
 
 IndexBuildsCoordinator::IndexBuildsSSS::IndexBuildsSSS()
     : ServerStatusSection("indexBuilds"),
@@ -1409,6 +1412,8 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx,
         AutoGetCollection indexBuildEntryColl(
             opCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX);
 
+        hangAbortIndexBuildByBuildUUIDAfterLocks.pauseWhileSet();
+
         // If we are using two-phase index builds and are no longer primary after receiving an
         // abort, we cannot replicate an abortIndexBuild oplog entry. Continue holding the RSTL to
         // check the replication state and to prevent any state transitions from happening while
@@ -2157,7 +2162,8 @@ StatusWith<std::tuple<Lock::DBLock,
                       repl::ReplicationStateTransitionLockGuard>>
 IndexBuildsCoordinator::_acquireExclusiveLockWithRSTLRetry(OperationContext* opCtx,
                                                            ReplIndexBuildState* replState,
-                                                           bool retry) {
+                                                           bool retry,
+                                                           bool collLockTimeout) {
 
     while (true) {
         // Skip the check for sharding's critical section check as it can only be acquired during a
@@ -2168,8 +2174,20 @@ IndexBuildsCoordinator::_acquireExclusiveLockWithRSTLRetry(OperationContext* opC
                                             /*.skipRSTLLock=*/true};
         Lock::DBLock dbLock{opCtx, replState->dbName, MODE_IX, Date_t::max(), lockOptions};
 
-        CollectionNamespaceOrUUIDLock collLock{
-            opCtx, {replState->dbName, replState->collectionUUID}, MODE_X};
+        Date_t collLockDeadline = Date_t::max();
+        if (collLockTimeout) {
+            collLockDeadline = Date_t::now() + Milliseconds{100};
+        }
+
+        boost::optional<CollectionNamespaceOrUUIDLock> collLock;
+        try {
+            collLock.emplace(opCtx,
+                             NamespaceStringOrUUID{replState->dbName, replState->collectionUUID},
+                             MODE_X,
+                             collLockDeadline);
+        } catch (const ExceptionFor<ErrorCodes::LockTimeout>& ex) {
+            return ex.toStatus();
+        }
 
         // If we can't acquire the RSTL within a given time period, there is an active state
         // transition and we should release our locks and try again. We would otherwise introduce a
@@ -2200,7 +2218,7 @@ IndexBuildsCoordinator::_acquireExclusiveLockWithRSTLRetry(OperationContext* opC
             continue;
         }
 
-        return std::make_tuple(std::move(dbLock), std::move(collLock), std::move(rstl));
+        return std::make_tuple(std::move(dbLock), std::move(collLock.value()), std::move(rstl));
     }
 }
 
@@ -2648,18 +2666,38 @@ void IndexBuildsCoordinator::_cleanUpSinglePhaseAfterNonShutdownFailure(
     // would prevent us from taking locks. Use a new OperationContext to abort the index build.
     runOnAlternateContext(
         opCtx, "self-abort", [this, replState, status](OperationContext* abortCtx) {
-            ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(abortCtx->lockState());
-            // Skip RSTL to avoid deadlocks with prepare conflicts and state transitions caused by
-            // taking a strong collection lock. See SERVER-42621.
-            Lock::DBLockSkipOptions lockOptions{/*.skipFlowControlTicket=*/false,
-                                                /*.skipRSTLLock=*/true};
-            Lock::DBLock dbLock(abortCtx, replState->dbName, MODE_IX, Date_t::max(), lockOptions);
-
-            const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
-            CollectionNamespaceOrUUIDLock collLock(abortCtx, dbAndUUID, MODE_X);
-            AutoGetCollection indexBuildEntryColl(
-                abortCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX);
-            _completeSelfAbort(abortCtx, replState, *indexBuildEntryColl, status);
+            // TODO (SERVER-76935): Remove collection lock timeout and abort state check loop.
+            // To avoid potential deadlocks with concurrent external aborts, which hold the
+            // collection MODE_X lock while waiting for this thread to signal its exit, the
+            // collection lock is acquired with a timeout, and retried only if the build is not
+            // already aborted (externally).
+            while (!replState->isAborted()) {
+                try {
+                    ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(
+                        abortCtx->lockState());
+                    // Skip RSTL to avoid deadlocks with prepare conflicts and state transitions
+                    // caused by taking a strong collection lock. See SERVER-42621.
+                    Lock::DBLockSkipOptions lockOptions{/*.skipFlowControlTicket=*/false,
+                                                        /*.skipRSTLLock=*/true};
+                    Lock::DBLock dbLock(
+                        abortCtx, replState->dbName, MODE_IX, Date_t::max(), lockOptions);
+
+                    const NamespaceStringOrUUID dbAndUUID(replState->dbName,
+                                                          replState->collectionUUID);
+                    CollectionNamespaceOrUUIDLock collLock(
+                        abortCtx, dbAndUUID, MODE_X, Date_t::now() + Milliseconds{100});
+                    AutoGetCollection indexBuildEntryColl(
+                        abortCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX);
+                    _completeSelfAbort(abortCtx, replState, *indexBuildEntryColl, status);
+                } catch (const ExceptionFor<ErrorCodes::LockTimeout>&) {
+                    LOGV2(7677700,
+                          "Unable to acquire collection lock within the timeout, a concurrent "
+                          "abort might be waiting for the builder thread to exit. Rechecking if "
+                          "self abort is still required.",
+                          "buildUUID"_attr = replState->buildUUID);
+                    continue;
+                }
+            }
         });
 }
 
@@ -2707,27 +2745,47 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterNonShutdownFailure(
                 ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(
                     abortCtx->lockState());
 
-                // Take RSTL to observe and prevent replication state from changing. This is done
-                // with the release/reacquire strategy to avoid deadlock with prepared txns.
-                auto [dbLock, collLock, rstl] = std::move(
-                    _acquireExclusiveLockWithRSTLRetry(abortCtx, replState.get()).getValue());
-
-                const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
-                auto replCoord = repl::ReplicationCoordinator::get(abortCtx);
-                if (!replCoord->canAcceptWritesFor(abortCtx, dbAndUUID)) {
-                    // Index builds may not fail on secondaries. If a primary replicated
-                    // an abortIndexBuild oplog entry, then this index build would have
-                    // received an IndexBuildAborted error code.
-                    fassert(51101,
-                            status.withContext(str::stream()
-                                               << "Index build: " << replState->buildUUID
-                                               << "; Database: "
-                                               << replState->dbName.toStringForErrorMsg()));
-                }
+                // TODO (SERVER-76935): Remove collection lock timeout and abort state check loop.
+                // To avoid potential deadlocks with concurrent external aborts, which hold the
+                // collection MODE_X lock while waiting for this thread to signal its exit, the
+                // collection lock is acquired with a timeout, and retried only if the build is not
+                // already aborted (externally).
+                while (!replState->isAborted()) {
+                    // Take RSTL to observe and prevent replication state from changing. This is
+                    // done with the release/reacquire strategy to avoid deadlock with prepared
+                    // txns.
+                    auto swLocks = _acquireExclusiveLockWithRSTLRetry(
+                        abortCtx, replState.get(), /*retry=*/true, /*collLockTimeout=*/true);
+                    if (!swLocks.isOK()) {
+                        LOGV2_DEBUG(
+                            7677701,
+                            1,
+                            "Index build: lock acquisition for self-abort failed, will retry.",
+                            "buildUUD"_attr = replState->buildUUID,
+                            "error"_attr = swLocks.getStatus());
+                        continue;
+                    }
 
-                AutoGetCollection indexBuildEntryColl(
-                    abortCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX);
-                _completeSelfAbort(abortCtx, replState, *indexBuildEntryColl, status);
+                    auto [dbLock, collLock, rstl] = std::move(swLocks.getValue());
+
+                    const NamespaceStringOrUUID dbAndUUID(replState->dbName,
+                                                          replState->collectionUUID);
+                    auto replCoord = repl::ReplicationCoordinator::get(abortCtx);
+                    if (!replCoord->canAcceptWritesFor(abortCtx, dbAndUUID)) {
+                        // Index builds may not fail on secondaries. If a primary replicated
+                        // an abortIndexBuild oplog entry, then this index build would have
+                        // received an IndexBuildAborted error code.
+                        fassert(51101,
+                                status.withContext(str::stream()
+                                                   << "Index build: " << replState->buildUUID
+                                                   << "; Database: "
+                                                   << replState->dbName.toStringForErrorMsg()));
+                    }
+
+                    AutoGetCollection indexBuildEntryColl(
+                        abortCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX);
+                    _completeSelfAbort(abortCtx, replState, *indexBuildEntryColl, status);
+                }
             }
         });
 }
@@ -2791,14 +2849,25 @@ void IndexBuildsCoordinator::_runIndexBuildInner(
     // If the index build has already been cleaned-up because it encountered an error, there is no
     // work to do. If feature flag IndexBuildGracefulErrorHandling is not enabled, the most routine
     // case is for this to be due to a self-abort caused by constraint checking during the commit
-    // phase. When the flag is enabled, constraint violations cause the index build to abort
-    // immediately on primaries, and an async external abort is requested.
+    // phase. When the flag is enabled, constraint violations cause the index build to fail
+    // immediately, but is not yet set to aborted, so an external async abort will be requested
+    // later on. It is also possible the build was concurrently aborted, between the detection of
+    // the failure and this check here, in which case we exit early.
     if (replState->isAborted()) {
         if (ErrorCodes::isTenantMigrationError(replState->getAbortStatus()))
             uassertStatusOK(replState->getAbortStatus());
         uassertStatusOK(status);
     }
 
+    // TODO (SERVER-76935): Remove collection lock timeout.
+    // It is also possible for the concurrent abort to happen after the check. This is an issue as
+    // external aborters hold the collection MODE_X lock while waiting for this thread to signal the
+    // promise, but if this thread proceeds beyond this check first it will try to acquire the
+    // collection lock before signaling the promise, potentially creating a deadlock. This is worked
+    // around by adding a timeout to the collection lock in the self-abort path, and rechecking if
+    // the build was aborted externally on timeout.
+
+
     // We do not hold a collection lock here, but we are protected against the collection being
     // dropped while the index build is still registered for the collection -- until abortIndexBuild
     // is called. The collection can be renamed, but it is OK for the name to be stale just for
@@ -3134,6 +3203,16 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites(
     const IndexBuildOptions& indexBuildOptions) {
     indexBuildsSSS.drainSideWritesTablePreCommit.addAndFetch(1);
     const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
+
+    failIndexBuildWithErrorInSecondDrain.executeIf(
+        [](const BSONObj& data) {
+            uasserted(data["error"].safeNumberInt(),
+                      "failIndexBuildWithErrorInSecondDrain failpoint triggered");
+        },
+        [&](const BSONObj& data) {
+            return UUID::parse(data["buildUUID"]) == replState->buildUUID;
+        });
+
     // Perform the second drain while stopping writes on the collection.
     {
         // Skip RSTL to avoid deadlocks with prepare conflicts and state transitions. See