summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWenbin Zhu <wenbin.zhu@mongodb.com>2021-09-03 23:06:13 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-10-21 15:56:08 +0000
commit46d180e4f463f96b7d5f64c73e43204fd47f5fd4 (patch)
tree87dbfff6f95546dc0f99dcf3d49da174ca202cb0
parent5500c2b6e8a0fad7e779d003feb0d3c9f92f1c77 (diff)
downloadmongo-46d180e4f463f96b7d5f64c73e43204fd47f5fd4.tar.gz
SERVER-59226 Fix deadlock between uninterruptible profiling operation and stepdown thread.
(cherry picked from commit 43479818bd01f27ee25b6e992045529d2ac0185a)
-rw-r--r--src/mongo/db/introspect.cpp22
-rw-r--r--src/mongo/db/operation_context.cpp16
-rw-r--r--src/mongo/db/operation_context.h11
3 files changed, 40 insertions, 9 deletions
diff --git a/src/mongo/db/introspect.cpp b/src/mongo/db/introspect.cpp
index a23d80ecbcc..ec7a744420b 100644
--- a/src/mongo/db/introspect.cpp
+++ b/src/mongo/db/introspect.cpp
@@ -119,20 +119,26 @@ void profile(OperationContext* opCtx, NetworkOp op) {
bool acquireDbXLock = false;
try {
- // Even if the operation we are profiling was interrupted, we still want to output the
- // profiler entry. This lock guard will prevent lock acquisitions from throwing exceptions
- // before we finish writing the entry. However, our maximum lock timeout overrides
- // uninterruptibility.
- boost::optional<UninterruptibleLockGuard> noInterrupt;
+ // Set the opCtx to be only interruptible for replication state changes. This is needed
+ // because for some operations that are already marked as killed due to errors such as
+ // operation time exceeding maxTimeMS, we still want to output the profiler entry. Thus
+ // in these cases we do not interrupt lock acquisition even though the opCtx has already
+ // been killed. In the meantime we need to make sure replication state changes can still
+ // interrupt lock acquisition, otherwise there could be deadlocks when the state change
+ // thread is waiting for the session checked out by this opCtx while holding RSTL lock.
+ // However when maxLockTimeout is set, we want it to be always interruptible.
if (!opCtx->lockState()->hasMaxLockTimeout()) {
- noInterrupt.emplace(opCtx->lockState());
+ opCtx->setIgnoreInterruptsExceptForReplStateChange(true);
}
+ // IX lock acquisitions beyond this block will not be related to writes to system.profile.
+ ON_BLOCK_EXIT([opCtx] { opCtx->setIgnoreInterruptsExceptForReplStateChange(false); });
+
while (true) {
std::unique_ptr<AutoGetDb> autoGetDb;
if (acquireDbXLock) {
- // We should not attempt to acquire an X lock while in "noInterrupt" scope.
- noInterrupt.reset();
+ // We should not attempt to acquire an X lock while opCtx ignores interrupts.
+ opCtx->setIgnoreInterruptsExceptForReplStateChange(false);
autoGetDb.reset(new AutoGetDb(opCtx, dbName, MODE_X));
if (autoGetDb->getDb()) {
diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp
index 02e8b511503..9f16861e985 100644
--- a/src/mongo/db/operation_context.cpp
+++ b/src/mongo/db/operation_context.cpp
@@ -202,6 +202,14 @@ bool opShouldFail(Client* client, const BSONObj& failPointInfo) {
} // namespace
Status OperationContext::checkForInterruptNoAssert() noexcept {
+ const auto killStatus = getKillStatus();
+
+ if (_ignoreInterruptsExceptForReplStateChange &&
+ killStatus != ErrorCodes::InterruptedDueToReplStateChange &&
+ !_killRequestedForReplStateChange.loadRelaxed()) {
+ return Status::OK();
+ }
+
// TODO: Remove the MONGO_likely(hasClientAndServiceContext) once all operation contexts are
// constructed with clients.
const auto hasClientAndServiceContext = getClient() && getServiceContext();
@@ -234,7 +242,6 @@ Status OperationContext::checkForInterruptNoAssert() noexcept {
}
}
- const auto killStatus = getKillStatus();
if (killStatus != ErrorCodes::OK) {
return Status(killStatus, "operation was interrupted");
}
@@ -327,6 +334,13 @@ void OperationContext::markKilled(ErrorCodes::Error killCode) {
log() << "operation was interrupted because a client disconnected";
}
+ // Record that a kill was requested on this operationContext due to replication state change
+ // since it is possible to call markKilled() multiple times but only the first killCode will
+ // be preserved.
+ if (killCode == ErrorCodes::InterruptedDueToReplStateChange) {
+ _killRequestedForReplStateChange.store(true);
+ }
+
if (auto status = ErrorCodes::OK; _killCode.compareAndSwap(&status, killCode)) {
_baton->notify();
}
diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h
index 38dae63a4c9..718c0c43af4 100644
--- a/src/mongo/db/operation_context.h
+++ b/src/mongo/db/operation_context.h
@@ -376,6 +376,14 @@ public:
}
/**
+ * Sets that this operation should ignore interruption except for replication state change. Can
+ * only be called by the thread executing this on behalf of this OperationContext.
+ */
+ void setIgnoreInterruptsExceptForReplStateChange(bool target) {
+ _ignoreInterruptsExceptForReplStateChange = target;
+ }
+
+ /**
* Clears metadata associated with a multi-document transaction.
*/
void resetMultiDocumentTransactionState() {
@@ -531,10 +539,13 @@ private:
bool _writesAreReplicated = true;
bool _shouldParticipateInFlowControl = true;
bool _inMultiDocumentTransaction = false;
+ bool _ignoreInterruptsExceptForReplStateChange = false;
// If true, this OpCtx will get interrupted during replica set stepUp and stepDown, regardless
// of what locks it's taken.
AtomicWord<bool> _alwaysInterruptAtStepDownOrUp{false};
+
+ AtomicWord<bool> _killRequestedForReplStateChange{false};
};
namespace repl {