diff options
author | Wenbin Zhu <wenbin.zhu@mongodb.com> | 2021-11-01 20:37:20 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-11-01 20:51:44 +0000 |
commit | 8443b63b12e53d21590b1f7dcf02c5df26b9ff40 (patch) | |
tree | 93eaa88ddf26d7cc996f03654ff2d2095bdbec2c | |
parent | d1a9e68d581f8e3162bc790bdb30986b8d030d86 (diff) | |
download | mongo-8443b63b12e53d21590b1f7dcf02c5df26b9ff40.tar.gz |
Revert "SERVER-59226 Fix deadlock between uninterruptible profiling operation and stepdown thread."
This reverts commit d1a9e68d581f8e3162bc790bdb30986b8d030d86.
-rw-r--r-- | src/mongo/db/introspect.cpp | 28 | ||||
-rw-r--r-- | src/mongo/db/operation_context.cpp | 17 | ||||
-rw-r--r-- | src/mongo/db/operation_context.h | 11 |
3 files changed, 9 insertions, 47 deletions
diff --git a/src/mongo/db/introspect.cpp b/src/mongo/db/introspect.cpp index 78ae238a239..8bda2543426 100644 --- a/src/mongo/db/introspect.cpp +++ b/src/mongo/db/introspect.cpp @@ -50,9 +50,9 @@ namespace mongo { +using std::unique_ptr; using std::endl; using std::string; -using std::unique_ptr; namespace { @@ -117,32 +117,20 @@ void profile(OperationContext* opCtx, NetworkOp op) { const string dbName(nsToDatabase(CurOp::get(opCtx)->getNS())); - // True if we need to acquire an X lock on the database in order to create the system.profile - // collection. - bool acquireDbXLock = false; - try { - // Set the opCtx to be only interruptible for replication state changes. This is needed - // because for some operations that are already marked as killed due to errors such as - // operation time exceeding maxTimeMS, we still want to output the profiler entry. Thus - // in these cases we do not interrupt lock acquisition even though the opCtx has already - // been killed. In the meantime we need to make sure replication state changes can still - // interrupt lock acquisition, otherwise there could be deadlocks when the state change - // thread is waiting for the session checked out by this opCtx while holding RSTL lock. - // However when maxLockTimeout is set, we want it to be always interruptible. + // Even if the operation we are profiling was interrupted, we still want to output the + // profiler entry. This lock guard will prevent lock acquisitions from throwing exceptions + // before we finish writing the entry. However, our maximum lock timeout overrides + // uninterruptibility. + boost::optional<UninterruptibleLockGuard> noInterrupt; if (!opCtx->lockState()->hasMaxLockTimeout()) { - opCtx->setIgnoreInterruptsExceptForReplStateChange(true); + noInterrupt.emplace(opCtx->lockState()); } - // IX lock acquisitions beyond this block will not be related to writes to system.profile. - ON_BLOCK_EXIT([opCtx] { opCtx->setIgnoreInterruptsExceptForReplStateChange(false); }); - + bool acquireDbXLock = false; while (true) { std::unique_ptr<AutoGetDb> autoGetDb; if (acquireDbXLock) { - // We should not attempt to acquire an X lock while opCtx ignores interrupts. - opCtx->setIgnoreInterruptsExceptForReplStateChange(false); - autoGetDb.reset(new AutoGetDb(opCtx, dbName, MODE_X)); if (autoGetDb->getDb()) { createProfileCollection(opCtx, autoGetDb->getDb()).transitional_ignore(); diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp index 3775b73a82f..02a3b1226d5 100644 --- a/src/mongo/db/operation_context.cpp +++ b/src/mongo/db/operation_context.cpp @@ -196,14 +196,6 @@ bool opShouldFail(Client* client, const BSONObj& failPointInfo) { } // namespace Status OperationContext::checkForInterruptNoAssert() { - const auto killStatus = getKillStatus(); - - if (_ignoreInterruptsExceptForReplStateChange && - killStatus != ErrorCodes::InterruptedDueToReplStateChange && - !_killRequestedForReplStateChange.loadRelaxed()) { - return Status::OK(); - } - // TODO: Remove the MONGO_likely(getClient()) once all operation contexts are constructed with // clients. if (MONGO_likely(getClient() && getServiceContext()) && @@ -223,6 +215,7 @@ Status OperationContext::checkForInterruptNoAssert() { } } + const auto killStatus = getKillStatus(); if (killStatus != ErrorCodes::OK) { return Status(killStatus, "operation was interrupted"); } @@ -368,14 +361,6 @@ void OperationContext::markKilled(ErrorCodes::Error killCode) { }); lkWaitMutex = stdx::unique_lock<stdx::mutex>{*_waitMutex}; } - - // Record that a kill was requested on this operationContext due to replication state change - // since it is possible to call markKilled() multiple times but only the first killCode will - // be preserved. - if (killCode == ErrorCodes::InterruptedDueToReplStateChange) { - _killRequestedForReplStateChange.store(true); - } - _killCode.compareAndSwap(ErrorCodes::OK, killCode); if (lkWaitMutex && _numKillers == 0) { invariant(_waitCV); diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h index a2eacc1e700..4a9bc116da4 100644 --- a/src/mongo/db/operation_context.h +++ b/src/mongo/db/operation_context.h @@ -432,14 +432,6 @@ public: */ Microseconds getRemainingMaxTimeMicros() const; - /** - * Sets that this operation should ignore interruption except for replication state change. Can - * only be called by the thread executing this on behalf of this OperationContext. - */ - void setIgnoreInterruptsExceptForReplStateChange(bool target) { - _ignoreInterruptsExceptForReplStateChange = target; - } - private: /** * Returns true if this operation has a deadline and it has passed according to the fast clock @@ -529,9 +521,6 @@ private: Timer _elapsedTime; bool _writesAreReplicated = true; - - bool _ignoreInterruptsExceptForReplStateChange = false; - AtomicWord<bool> _killRequestedForReplStateChange{false}; }; namespace repl { |