summaryrefslogtreecommitdiff
path: root/src/mongo/db/index_builds_coordinator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/index_builds_coordinator.cpp')
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp155
1 files changed, 117 insertions, 38 deletions
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index de5004df03d..17fe7380a5d 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -94,7 +94,10 @@ MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeWaitingUntilMajorityOpTime);
MONGO_FAIL_POINT_DEFINE(hangBeforeUnregisteringAfterCommit);
MONGO_FAIL_POINT_DEFINE(failSetUpResumeIndexBuild);
MONGO_FAIL_POINT_DEFINE(failIndexBuildWithError);
+MONGO_FAIL_POINT_DEFINE(failIndexBuildWithErrorInSecondDrain);
MONGO_FAIL_POINT_DEFINE(hangInRemoveIndexBuildEntryAfterCommitOrAbort);
+MONGO_FAIL_POINT_DEFINE(hangAbortIndexBuildByBuildUUIDAfterLocks);
+
IndexBuildsCoordinator::IndexBuildsSSS::IndexBuildsSSS()
: ServerStatusSection("indexBuilds"),
@@ -1409,6 +1412,8 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx,
AutoGetCollection indexBuildEntryColl(
opCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX);
+ hangAbortIndexBuildByBuildUUIDAfterLocks.pauseWhileSet();
+
// If we are using two-phase index builds and are no longer primary after receiving an
// abort, we cannot replicate an abortIndexBuild oplog entry. Continue holding the RSTL to
// check the replication state and to prevent any state transitions from happening while
@@ -2157,7 +2162,8 @@ StatusWith<std::tuple<Lock::DBLock,
repl::ReplicationStateTransitionLockGuard>>
IndexBuildsCoordinator::_acquireExclusiveLockWithRSTLRetry(OperationContext* opCtx,
ReplIndexBuildState* replState,
- bool retry) {
+ bool retry,
+ bool collLockTimeout) {
while (true) {
// Skip the check for sharding's critical section check as it can only be acquired during a
@@ -2168,8 +2174,20 @@ IndexBuildsCoordinator::_acquireExclusiveLockWithRSTLRetry(OperationContext* opC
/*.skipRSTLLock=*/true};
Lock::DBLock dbLock{opCtx, replState->dbName, MODE_IX, Date_t::max(), lockOptions};
- CollectionNamespaceOrUUIDLock collLock{
- opCtx, {replState->dbName, replState->collectionUUID}, MODE_X};
+ Date_t collLockDeadline = Date_t::max();
+ if (collLockTimeout) {
+ collLockDeadline = Date_t::now() + Milliseconds{100};
+ }
+
+ boost::optional<CollectionNamespaceOrUUIDLock> collLock;
+ try {
+ collLock.emplace(opCtx,
+ NamespaceStringOrUUID{replState->dbName, replState->collectionUUID},
+ MODE_X,
+ collLockDeadline);
+ } catch (const ExceptionFor<ErrorCodes::LockTimeout>& ex) {
+ return ex.toStatus();
+ }
// If we can't acquire the RSTL within a given time period, there is an active state
// transition and we should release our locks and try again. We would otherwise introduce a
@@ -2200,7 +2218,7 @@ IndexBuildsCoordinator::_acquireExclusiveLockWithRSTLRetry(OperationContext* opC
continue;
}
- return std::make_tuple(std::move(dbLock), std::move(collLock), std::move(rstl));
+ return std::make_tuple(std::move(dbLock), std::move(collLock.value()), std::move(rstl));
}
}
@@ -2648,18 +2666,38 @@ void IndexBuildsCoordinator::_cleanUpSinglePhaseAfterNonShutdownFailure(
// would prevent us from taking locks. Use a new OperationContext to abort the index build.
runOnAlternateContext(
opCtx, "self-abort", [this, replState, status](OperationContext* abortCtx) {
- ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(abortCtx->lockState());
- // Skip RSTL to avoid deadlocks with prepare conflicts and state transitions caused by
- // taking a strong collection lock. See SERVER-42621.
- Lock::DBLockSkipOptions lockOptions{/*.skipFlowControlTicket=*/false,
- /*.skipRSTLLock=*/true};
- Lock::DBLock dbLock(abortCtx, replState->dbName, MODE_IX, Date_t::max(), lockOptions);
-
- const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
- CollectionNamespaceOrUUIDLock collLock(abortCtx, dbAndUUID, MODE_X);
- AutoGetCollection indexBuildEntryColl(
- abortCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX);
- _completeSelfAbort(abortCtx, replState, *indexBuildEntryColl, status);
+ // TODO (SERVER-76935): Remove collection lock timeout and abort state check loop.
+ // To avoid potential deadlocks with concurrent external aborts, which hold the
+ // collection MODE_X lock while waiting for this thread to signal its exit, the
+ // collection lock is acquired with a timeout, and retried only if the build is not
+ // already aborted (externally).
+ while (!replState->isAborted()) {
+ try {
+ ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(
+ abortCtx->lockState());
+ // Skip RSTL to avoid deadlocks with prepare conflicts and state transitions
+ // caused by taking a strong collection lock. See SERVER-42621.
+ Lock::DBLockSkipOptions lockOptions{/*.skipFlowControlTicket=*/false,
+ /*.skipRSTLLock=*/true};
+ Lock::DBLock dbLock(
+ abortCtx, replState->dbName, MODE_IX, Date_t::max(), lockOptions);
+
+ const NamespaceStringOrUUID dbAndUUID(replState->dbName,
+ replState->collectionUUID);
+ CollectionNamespaceOrUUIDLock collLock(
+ abortCtx, dbAndUUID, MODE_X, Date_t::now() + Milliseconds{100});
+ AutoGetCollection indexBuildEntryColl(
+ abortCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX);
+ _completeSelfAbort(abortCtx, replState, *indexBuildEntryColl, status);
+ } catch (const ExceptionFor<ErrorCodes::LockTimeout>&) {
+ LOGV2(7677700,
+ "Unable to acquire collection lock within the timeout, a concurrent "
+ "abort might be waiting for the builder thread to exit. Rechecking if "
+ "self abort is still required.",
+ "buildUUID"_attr = replState->buildUUID);
+ continue;
+ }
+ }
});
}
@@ -2707,27 +2745,47 @@ void IndexBuildsCoordinator::_cleanUpTwoPhaseAfterNonShutdownFailure(
ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(
abortCtx->lockState());
- // Take RSTL to observe and prevent replication state from changing. This is done
- // with the release/reacquire strategy to avoid deadlock with prepared txns.
- auto [dbLock, collLock, rstl] = std::move(
- _acquireExclusiveLockWithRSTLRetry(abortCtx, replState.get()).getValue());
-
- const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
- auto replCoord = repl::ReplicationCoordinator::get(abortCtx);
- if (!replCoord->canAcceptWritesFor(abortCtx, dbAndUUID)) {
- // Index builds may not fail on secondaries. If a primary replicated
- // an abortIndexBuild oplog entry, then this index build would have
- // received an IndexBuildAborted error code.
- fassert(51101,
- status.withContext(str::stream()
- << "Index build: " << replState->buildUUID
- << "; Database: "
- << replState->dbName.toStringForErrorMsg()));
- }
+ // TODO (SERVER-76935): Remove collection lock timeout and abort state check loop.
+ // To avoid potential deadlocks with concurrent external aborts, which hold the
+ // collection MODE_X lock while waiting for this thread to signal its exit, the
+ // collection lock is acquired with a timeout, and retried only if the build is not
+ // already aborted (externally).
+ while (!replState->isAborted()) {
+ // Take RSTL to observe and prevent replication state from changing. This is
+ // done with the release/reacquire strategy to avoid deadlock with prepared
+ // txns.
+ auto swLocks = _acquireExclusiveLockWithRSTLRetry(
+ abortCtx, replState.get(), /*retry=*/true, /*collLockTimeout=*/true);
+ if (!swLocks.isOK()) {
+ LOGV2_DEBUG(
+ 7677701,
+ 1,
+ "Index build: lock acquisition for self-abort failed, will retry.",
+ "buildUUD"_attr = replState->buildUUID,
+ "error"_attr = swLocks.getStatus());
+ continue;
+ }
- AutoGetCollection indexBuildEntryColl(
- abortCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX);
- _completeSelfAbort(abortCtx, replState, *indexBuildEntryColl, status);
+ auto [dbLock, collLock, rstl] = std::move(swLocks.getValue());
+
+ const NamespaceStringOrUUID dbAndUUID(replState->dbName,
+ replState->collectionUUID);
+ auto replCoord = repl::ReplicationCoordinator::get(abortCtx);
+ if (!replCoord->canAcceptWritesFor(abortCtx, dbAndUUID)) {
+ // Index builds may not fail on secondaries. If a primary replicated
+ // an abortIndexBuild oplog entry, then this index build would have
+ // received an IndexBuildAborted error code.
+ fassert(51101,
+ status.withContext(str::stream()
+ << "Index build: " << replState->buildUUID
+ << "; Database: "
+ << replState->dbName.toStringForErrorMsg()));
+ }
+
+ AutoGetCollection indexBuildEntryColl(
+ abortCtx, NamespaceString::kIndexBuildEntryNamespace, MODE_IX);
+ _completeSelfAbort(abortCtx, replState, *indexBuildEntryColl, status);
+ }
}
});
}
@@ -2791,14 +2849,25 @@ void IndexBuildsCoordinator::_runIndexBuildInner(
// If the index build has already been cleaned-up because it encountered an error, there is no
// work to do. If feature flag IndexBuildGracefulErrorHandling is not enabled, the most routine
// case is for this to be due to a self-abort caused by constraint checking during the commit
- // phase. When the flag is enabled, constraint violations cause the index build to abort
- // immediately on primaries, and an async external abort is requested.
+ // phase. When the flag is enabled, constraint violations cause the index build to fail
+ // immediately, but is not yet set to aborted, so an external async abort will be requested
+ // later on. It is also possible the build was concurrently aborted, between the detection of
+ // the failure and this check here, in which case we exit early.
if (replState->isAborted()) {
if (ErrorCodes::isTenantMigrationError(replState->getAbortStatus()))
uassertStatusOK(replState->getAbortStatus());
uassertStatusOK(status);
}
+ // TODO (SERVER-76935): Remove collection lock timeout.
+ // It is also possible for the concurrent abort to happen after the check. This is an issue as
+ // external aborters hold the collection MODE_X lock while waiting for this thread to signal the
+ // promise, but if this thread proceeds beyond this check first it will try to acquire the
+ // collection lock before signaling the promise, potentially creating a deadlock. This is worked
+ // around by adding a timeout to the collection lock in the self-abort path, and rechecking if
+ // the build was aborted externally on timeout.
+
+
// We do not hold a collection lock here, but we are protected against the collection being
// dropped while the index build is still registered for the collection -- until abortIndexBuild
// is called. The collection can be renamed, but it is OK for the name to be stale just for
@@ -3134,6 +3203,16 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites(
const IndexBuildOptions& indexBuildOptions) {
indexBuildsSSS.drainSideWritesTablePreCommit.addAndFetch(1);
const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
+
+ failIndexBuildWithErrorInSecondDrain.executeIf(
+ [](const BSONObj& data) {
+ uasserted(data["error"].safeNumberInt(),
+ "failIndexBuildWithErrorInSecondDrain failpoint triggered");
+ },
+ [&](const BSONObj& data) {
+ return UUID::parse(data["buildUUID"]) == replState->buildUUID;
+ });
+
// Perform the second drain while stopping writes on the collection.
{
// Skip RSTL to avoid deadlocks with prepare conflicts and state transitions. See