diff options
-rw-r--r-- | src/mongo/db/introspect.cpp | 28 | ||||
-rw-r--r-- | src/mongo/db/operation_context.cpp | 17 | ||||
-rw-r--r-- | src/mongo/db/operation_context.h | 11 |
3 files changed, 47 insertions, 9 deletions
diff --git a/src/mongo/db/introspect.cpp b/src/mongo/db/introspect.cpp index 8bda2543426..78ae238a239 100644 --- a/src/mongo/db/introspect.cpp +++ b/src/mongo/db/introspect.cpp @@ -50,9 +50,9 @@ namespace mongo { -using std::unique_ptr; using std::endl; using std::string; +using std::unique_ptr; namespace { @@ -117,20 +117,32 @@ void profile(OperationContext* opCtx, NetworkOp op) { const string dbName(nsToDatabase(CurOp::get(opCtx)->getNS())); + // True if we need to acquire an X lock on the database in order to create the system.profile + // collection. + bool acquireDbXLock = false; + try { - // Even if the operation we are profiling was interrupted, we still want to output the - // profiler entry. This lock guard will prevent lock acquisitions from throwing exceptions - // before we finish writing the entry. However, our maximum lock timeout overrides - // uninterruptibility. - boost::optional<UninterruptibleLockGuard> noInterrupt; + // Set the opCtx to be only interruptible for replication state changes. This is needed + // because for some operations that are already marked as killed due to errors such as + // operation time exceeding maxTimeMS, we still want to output the profiler entry. Thus + // in these cases we do not interrupt lock acquisition even though the opCtx has already + // been killed. In the meantime we need to make sure replication state changes can still + // interrupt lock acquisition, otherwise there could be deadlocks when the state change + // thread is waiting for the session checked out by this opCtx while holding RSTL lock. + // However when maxLockTimeout is set, we want it to be always interruptible. if (!opCtx->lockState()->hasMaxLockTimeout()) { - noInterrupt.emplace(opCtx->lockState()); + opCtx->setIgnoreInterruptsExceptForReplStateChange(true); } - bool acquireDbXLock = false; + // IX lock acquisitions beyond this block will not be related to writes to system.profile. + ON_BLOCK_EXIT([opCtx] { opCtx->setIgnoreInterruptsExceptForReplStateChange(false); }); + while (true) { std::unique_ptr<AutoGetDb> autoGetDb; if (acquireDbXLock) { + // We should not attempt to acquire an X lock while opCtx ignores interrupts. + opCtx->setIgnoreInterruptsExceptForReplStateChange(false); + autoGetDb.reset(new AutoGetDb(opCtx, dbName, MODE_X)); if (autoGetDb->getDb()) { createProfileCollection(opCtx, autoGetDb->getDb()).transitional_ignore(); diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp index 02a3b1226d5..3775b73a82f 100644 --- a/src/mongo/db/operation_context.cpp +++ b/src/mongo/db/operation_context.cpp @@ -196,6 +196,14 @@ bool opShouldFail(Client* client, const BSONObj& failPointInfo) { } // namespace Status OperationContext::checkForInterruptNoAssert() { + const auto killStatus = getKillStatus(); + + if (_ignoreInterruptsExceptForReplStateChange && + killStatus != ErrorCodes::InterruptedDueToReplStateChange && + !_killRequestedForReplStateChange.loadRelaxed()) { + return Status::OK(); + } + // TODO: Remove the MONGO_likely(getClient()) once all operation contexts are constructed with // clients. if (MONGO_likely(getClient() && getServiceContext()) && @@ -215,7 +223,6 @@ Status OperationContext::checkForInterruptNoAssert() { } } - const auto killStatus = getKillStatus(); if (killStatus != ErrorCodes::OK) { return Status(killStatus, "operation was interrupted"); } @@ -361,6 +368,14 @@ void OperationContext::markKilled(ErrorCodes::Error killCode) { }); lkWaitMutex = stdx::unique_lock<stdx::mutex>{*_waitMutex}; } + + // Record that a kill was requested on this operationContext due to replication state change + // since it is possible to call markKilled() multiple times but only the first killCode will + // be preserved. + if (killCode == ErrorCodes::InterruptedDueToReplStateChange) { + _killRequestedForReplStateChange.store(true); + } + _killCode.compareAndSwap(ErrorCodes::OK, killCode); if (lkWaitMutex && _numKillers == 0) { invariant(_waitCV); diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h index 4a9bc116da4..a2eacc1e700 100644 --- a/src/mongo/db/operation_context.h +++ b/src/mongo/db/operation_context.h @@ -432,6 +432,14 @@ public: */ Microseconds getRemainingMaxTimeMicros() const; + /** + * Sets that this operation should ignore interruption except for replication state change. Can + * only be called by the thread executing this on behalf of this OperationContext. + */ + void setIgnoreInterruptsExceptForReplStateChange(bool target) { + _ignoreInterruptsExceptForReplStateChange = target; + } + private: /** * Returns true if this operation has a deadline and it has passed according to the fast clock @@ -521,6 +529,9 @@ private: Timer _elapsedTime; bool _writesAreReplicated = true; + + bool _ignoreInterruptsExceptForReplStateChange = false; + AtomicWord<bool> _killRequestedForReplStateChange{false}; }; namespace repl { |