Revert "SERVER-59226 Fix deadlock between uninterruptible profiling operation and stepdown thread."

This reverts commit d1a9e68d581f8e3162bc790bdb30986b8d030d86.
author: Wenbin Zhu <wenbin.zhu@mongodb.com> 2021-11-01 20:37:20 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-11-01 20:51:44 +0000
commit: 8443b63b12e53d21590b1f7dcf02c5df26b9ff40 (patch)
tree: 93eaa88ddf26d7cc996f03654ff2d2095bdbec2c
parent: d1a9e68d581f8e3162bc790bdb30986b8d030d86 (diff)
download: mongo-8443b63b12e53d21590b1f7dcf02c5df26b9ff40.tar.gz
3 files changed, 9 insertions, 47 deletions
diff --git a/src/mongo/db/introspect.cpp b/src/mongo/db/introspect.cpp
index 78ae238a239..8bda2543426 100644
--- a/src/mongo/db/introspect.cpp
+++ b/src/mongo/db/introspect.cpp
@@ -50,9 +50,9 @@
 
 namespace mongo {
 
+using std::unique_ptr;
 using std::endl;
 using std::string;
-using std::unique_ptr;
 
 namespace {
 
@@ -117,32 +117,20 @@ void profile(OperationContext* opCtx, NetworkOp op) {
 
     const string dbName(nsToDatabase(CurOp::get(opCtx)->getNS()));
 
-    // True if we need to acquire an X lock on the database in order to create the system.profile
-    // collection.
-    bool acquireDbXLock = false;
-
     try {
-        // Set the opCtx to be only interruptible for replication state changes. This is needed
-        // because for some operations that are already marked as killed due to errors such as
-        // operation time exceeding maxTimeMS, we still want to output the profiler entry. Thus
-        // in these cases we do not interrupt lock acquisition even though the opCtx has already
-        // been killed. In the meantime we need to make sure replication state changes can still
-        // interrupt lock acquisition, otherwise there could be deadlocks when the state change
-        // thread is waiting for the session checked out by this opCtx while holding RSTL lock.
-        // However when maxLockTimeout is set, we want it to be always interruptible.
+        // Even if the operation we are profiling was interrupted, we still want to output the
+        // profiler entry.  This lock guard will prevent lock acquisitions from throwing exceptions
+        // before we finish writing the entry. However, our maximum lock timeout overrides
+        // uninterruptibility.
+        boost::optional<UninterruptibleLockGuard> noInterrupt;
         if (!opCtx->lockState()->hasMaxLockTimeout()) {
-            opCtx->setIgnoreInterruptsExceptForReplStateChange(true);
+            noInterrupt.emplace(opCtx->lockState());
         }
 
-        // IX lock acquisitions beyond this block will not be related to writes to system.profile.
-        ON_BLOCK_EXIT([opCtx] { opCtx->setIgnoreInterruptsExceptForReplStateChange(false); });
-
+        bool acquireDbXLock = false;
         while (true) {
             std::unique_ptr<AutoGetDb> autoGetDb;
             if (acquireDbXLock) {
-                // We should not attempt to acquire an X lock while opCtx ignores interrupts.
-                opCtx->setIgnoreInterruptsExceptForReplStateChange(false);
-
                 autoGetDb.reset(new AutoGetDb(opCtx, dbName, MODE_X));
                 if (autoGetDb->getDb()) {
                     createProfileCollection(opCtx, autoGetDb->getDb()).transitional_ignore();
diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp
index 3775b73a82f..02a3b1226d5 100644
--- a/src/mongo/db/operation_context.cpp
+++ b/src/mongo/db/operation_context.cpp
@@ -196,14 +196,6 @@ bool opShouldFail(Client* client, const BSONObj& failPointInfo) {
 }  // namespace
 
 Status OperationContext::checkForInterruptNoAssert() {
-    const auto killStatus = getKillStatus();
-
-    if (_ignoreInterruptsExceptForReplStateChange &&
-        killStatus != ErrorCodes::InterruptedDueToReplStateChange &&
-        !_killRequestedForReplStateChange.loadRelaxed()) {
-        return Status::OK();
-    }
-
     // TODO: Remove the MONGO_likely(getClient()) once all operation contexts are constructed with
     // clients.
     if (MONGO_likely(getClient() && getServiceContext()) &&
@@ -223,6 +215,7 @@ Status OperationContext::checkForInterruptNoAssert() {
         }
     }
 
+    const auto killStatus = getKillStatus();
     if (killStatus != ErrorCodes::OK) {
         return Status(killStatus, "operation was interrupted");
     }
@@ -368,14 +361,6 @@ void OperationContext::markKilled(ErrorCodes::Error killCode) {
         });
         lkWaitMutex = stdx::unique_lock<stdx::mutex>{*_waitMutex};
     }
-
-    // Record that a kill was requested on this operationContext due to replication state change
-    // since it is possible to call markKilled() multiple times but only the first killCode will
-    // be preserved.
-    if (killCode == ErrorCodes::InterruptedDueToReplStateChange) {
-        _killRequestedForReplStateChange.store(true);
-    }
-
     _killCode.compareAndSwap(ErrorCodes::OK, killCode);
     if (lkWaitMutex && _numKillers == 0) {
         invariant(_waitCV);
diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h
index a2eacc1e700..4a9bc116da4 100644
--- a/src/mongo/db/operation_context.h
+++ b/src/mongo/db/operation_context.h
@@ -432,14 +432,6 @@ public:
      */
     Microseconds getRemainingMaxTimeMicros() const;
 
-    /**
-     * Sets that this operation should ignore interruption except for replication state change. Can
-     * only be called by the thread executing this on behalf of this OperationContext.
-     */
-    void setIgnoreInterruptsExceptForReplStateChange(bool target) {
-        _ignoreInterruptsExceptForReplStateChange = target;
-    }
-
 private:
     /**
      * Returns true if this operation has a deadline and it has passed according to the fast clock
@@ -529,9 +521,6 @@ private:
     Timer _elapsedTime;
 
     bool _writesAreReplicated = true;
-
-    bool _ignoreInterruptsExceptForReplStateChange = false;
-    AtomicWord<bool> _killRequestedForReplStateChange{false};
 };
 
 namespace repl {
author	Wenbin Zhu <wenbin.zhu@mongodb.com>	2021-11-01 20:37:20 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-11-01 20:51:44 +0000
commit	8443b63b12e53d21590b1f7dcf02c5df26b9ff40 (patch)
tree	93eaa88ddf26d7cc996f03654ff2d2095bdbec2c
parent	d1a9e68d581f8e3162bc790bdb30986b8d030d86 (diff)
download	mongo-8443b63b12e53d21590b1f7dcf02c5df26b9ff40.tar.gz