SERVER-71191 Unlock and relock RSTL during index build setup

(cherry picked from commit f1203e555321e326b0621479bcc6b607c5854391)
author: Gregory Noma <gregory.noma@gmail.com> 2022-11-11 19:33:14 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-11-16 16:22:28 +0000
commit: dc08ee3d6e52cbea9a643571e214b8943a7d60ed (patch)
tree: 77cb8805a76fc15b6d90608a959696bd6fceecb6
parent: 62616d131c0a574cc7fdaecbc54fa158d851c61c (diff)
download: mongo-dc08ee3d6e52cbea9a643571e214b8943a7d60ed.tar.gz
3 files changed, 45 insertions, 5 deletions
diff --git a/src/mongo/db/concurrency/replication_state_transition_lock_guard.cpp b/src/mongo/db/concurrency/replication_state_transition_lock_guard.cpp
index 394425e9a0d..63f6cde8c00 100644
--- a/src/mongo/db/concurrency/replication_state_transition_lock_guard.cpp
+++ b/src/mongo/db/concurrency/replication_state_transition_lock_guard.cpp
@@ -52,6 +52,12 @@ ReplicationStateTransitionLockGuard::ReplicationStateTransitionLockGuard(Operati
     _enqueueLock();
 }
 
+ReplicationStateTransitionLockGuard::ReplicationStateTransitionLockGuard(
+    ReplicationStateTransitionLockGuard&& other)
+    : _opCtx(other._opCtx), _mode(other._mode), _result(other._result) {
+    other._result = LockResult::LOCK_INVALID;
+}
+
 ReplicationStateTransitionLockGuard::~ReplicationStateTransitionLockGuard() {
     _unlock();
 }
@@ -83,6 +89,10 @@ void ReplicationStateTransitionLockGuard::_enqueueLock() {
 }
 
 void ReplicationStateTransitionLockGuard::_unlock() {
+    if (_result == LockResult::LOCK_INVALID) {
+        return;
+    }
+
     // If ReplicationStateTransitionLockGuard is called in a WriteUnitOfWork, we won't accept
     // any exceptions to be thrown between _enqueueLock and waitForLockUntil because that would
     // delay cleaning up any failed RSTL lock attempt state from lock manager.
diff --git a/src/mongo/db/concurrency/replication_state_transition_lock_guard.h b/src/mongo/db/concurrency/replication_state_transition_lock_guard.h
index 64107a74251..cf94dc77c28 100644
--- a/src/mongo/db/concurrency/replication_state_transition_lock_guard.h
+++ b/src/mongo/db/concurrency/replication_state_transition_lock_guard.h
@@ -63,8 +63,7 @@ public:
      */
     ReplicationStateTransitionLockGuard(OperationContext* opCtx, LockMode mode, EnqueueOnly);
 
-    ReplicationStateTransitionLockGuard(ReplicationStateTransitionLockGuard&&) = delete;
-    ReplicationStateTransitionLockGuard& operator=(ReplicationStateTransitionLockGuard&&) = delete;
+    ReplicationStateTransitionLockGuard(ReplicationStateTransitionLockGuard&&);
 
     ~ReplicationStateTransitionLockGuard();
 
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 39106fbf5e3..d8db6eec124 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -2001,10 +2001,41 @@ IndexBuildsCoordinator::PostSetupAction IndexBuildsCoordinator::_setUpIndexBuild
     std::shared_ptr<ReplIndexBuildState> replState,
     Timestamp startTimestamp,
     const IndexBuildOptions& indexBuildOptions) {
-    const NamespaceStringOrUUID nssOrUuid{replState->dbName, replState->collectionUUID};
+    auto [dbLock, collLock, rstl] = [&] {
+        while (true) {
+            Lock::DBLock dbLock{opCtx, {boost::none, replState->dbName}, MODE_IX};
 
-    AutoGetCollection coll(opCtx, nssOrUuid, MODE_X);
-    CollectionWriter collection(opCtx, coll);
+            // Unlock the RSTL to avoid deadlocks with prepared transactions and replication state
+            // transitions. See SERVER-71191.
+            unlockRSTL(opCtx);
+
+            Lock::CollectionLock collLock{
+                opCtx, {replState->dbName, replState->collectionUUID}, MODE_X};
+            repl::ReplicationStateTransitionLockGuard rstl{
+                opCtx, MODE_IX, repl::ReplicationStateTransitionLockGuard::EnqueueOnly{}};
+
+            try {
+                // Since this thread is not killable by state transitions, this deadline is
+                // effectively the longest period of time we can block a state transition. State
+                // transitions are infrequent, but need to happen quickly. It should be okay to set
+                // this to a low value because the RSTL is rarely contended and, if this does time
+                // out, we will retry and reacquire the RSTL again without a deadline.
+                rstl.waitForLockUntil(Date_t::now() + Milliseconds{10});
+            } catch (const ExceptionFor<ErrorCodes::LockTimeout>&) {
+                // We weren't able to re-acquire the RSTL within the timeout, which means there is
+                // an active state transition. Release our locks and try again from the beginning.
+                LOGV2(7119100,
+                      "Unable to acquire RSTL for index build setup within deadline, releasing "
+                      "locks and trying again",
+                      "buildUUID"_attr = replState->buildUUID);
+                continue;
+            }
+
+            return std::make_tuple(std::move(dbLock), std::move(collLock), std::move(rstl));
+        }
+    }();
+
+    CollectionWriter collection(opCtx, replState->collectionUUID);
     CollectionShardingState::get(opCtx, collection->ns())->checkShardVersionOrThrow(opCtx);
 
     auto replCoord = repl::ReplicationCoordinator::get(opCtx);
author	Gregory Noma <gregory.noma@gmail.com>	2022-11-11 19:33:14 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-11-16 16:22:28 +0000
commit	dc08ee3d6e52cbea9a643571e214b8943a7d60ed (patch)
tree	77cb8805a76fc15b6d90608a959696bd6fceecb6
parent	62616d131c0a574cc7fdaecbc54fa158d851c61c (diff)
download	mongo-dc08ee3d6e52cbea9a643571e214b8943a7d60ed.tar.gz