summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWenbin Zhu <wenbin.zhu@mongodb.com>2021-09-03 23:06:13 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-10-20 23:45:32 +0000
commit73ab1b397b1292dd791775e9f2f73fb0b3b4699b (patch)
treefd9d9c4e0ae62e846b21fb78ff707c8aaa90b6d7
parente17453f50625b74c5edaf30a2602e4db764cd240 (diff)
downloadmongo-73ab1b397b1292dd791775e9f2f73fb0b3b4699b.tar.gz
SERVER-59226 Fix deadlock between uninterruptible profiling operation and stepdown thread.
(cherry picked from commit 43479818bd01f27ee25b6e992045529d2ac0185a)
-rw-r--r--src/mongo/db/introspect.cpp28
-rw-r--r--src/mongo/db/operation_context.cpp16
-rw-r--r--src/mongo/db/operation_context.h11
3 files changed, 42 insertions, 13 deletions
diff --git a/src/mongo/db/introspect.cpp b/src/mongo/db/introspect.cpp
index bc3232f31a2..f63cffbd86c 100644
--- a/src/mongo/db/introspect.cpp
+++ b/src/mongo/db/introspect.cpp
@@ -89,21 +89,25 @@ void profile(OperationContext* opCtx, NetworkOp op) {
// replication lag. As such, they should be excluded from Flow Control.
opCtx->setShouldParticipateInFlowControl(false);
+ // Set the opCtx to be only interruptible for replication state changes. This is needed
+ // because for some operations that are already marked as killed due to errors such as
+ // operation time exceeding maxTimeMS, we still want to output the profiler entry. Thus
+ // in these cases we do not interrupt lock acquisition even though the opCtx has already
+ // been killed. In the meantime we need to make sure replication state changes can still
+ // interrupt lock acquisition, otherwise there could be deadlocks when the state change
+ // thread is waiting for the session checked out by this opCtx while holding RSTL lock.
+ // However when maxLockTimeout is set, we want it to be always interruptible.
+ if (!opCtx->lockState()->hasMaxLockTimeout()) {
+ opCtx->setIgnoreInterruptsExceptForReplStateChange(true);
+ }
+
// IX lock acquisitions beyond this block will not be related to writes to system.profile.
- ON_BLOCK_EXIT(
- [opCtx, origFlowControl] { opCtx->setShouldParticipateInFlowControl(origFlowControl); });
+ ON_BLOCK_EXIT([opCtx, origFlowControl] {
+ opCtx->setIgnoreInterruptsExceptForReplStateChange(false);
+ opCtx->setShouldParticipateInFlowControl(origFlowControl);
+ });
try {
-
- // Even if the operation we are profiling was interrupted, we still want to output the
- // profiler entry. This lock guard will prevent lock acquisitions from throwing exceptions
- // before we finish writing the entry. However, our maximum lock timeout overrides
- // uninterruptibility.
- boost::optional<UninterruptibleLockGuard> noInterrupt;
- if (!opCtx->lockState()->hasMaxLockTimeout()) {
- noInterrupt.emplace(opCtx->lockState());
- }
-
const auto dbProfilingNS = NamespaceString(dbName, "system.profile");
AutoGetCollection autoColl(opCtx, dbProfilingNS, MODE_IX);
Database* const db = autoColl.getDb();
diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp
index 8817793d618..b8cd5935669 100644
--- a/src/mongo/db/operation_context.cpp
+++ b/src/mongo/db/operation_context.cpp
@@ -225,6 +225,14 @@ bool opShouldFail(Client* client, const BSONObj& failPointInfo) {
} // namespace
Status OperationContext::checkForInterruptNoAssert() noexcept {
+ const auto killStatus = getKillStatus();
+
+ if (_ignoreInterruptsExceptForReplStateChange &&
+ killStatus != ErrorCodes::InterruptedDueToReplStateChange &&
+ !_killRequestedForReplStateChange.loadRelaxed()) {
+ return Status::OK();
+ }
+
// TODO: Remove the MONGO_likely(hasClientAndServiceContext) once all operation contexts are
// constructed with clients.
const auto hasClientAndServiceContext = getClient() && getServiceContext();
@@ -257,7 +265,6 @@ Status OperationContext::checkForInterruptNoAssert() noexcept {
},
[&](auto&& data) { return opShouldFail(getClient(), data); });
- const auto killStatus = getKillStatus();
if (killStatus != ErrorCodes::OK) {
if (killStatus == ErrorCodes::TransactionExceededLifetimeLimitSeconds)
return Status(
@@ -356,6 +363,13 @@ void OperationContext::markKilled(ErrorCodes::Error killCode) {
LOGV2(20883, "Interrupted operation as its client disconnected", "opId"_attr = getOpID());
}
+ // Record that a kill was requested on this operationContext due to replication state change
+ // since it is possible to call markKilled() multiple times but only the first killCode will
+ // be preserved.
+ if (killCode == ErrorCodes::InterruptedDueToReplStateChange) {
+ _killRequestedForReplStateChange.store(true);
+ }
+
if (auto status = ErrorCodes::OK; _killCode.compareAndSwap(&status, killCode)) {
_baton->notify();
}
diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h
index 4eed5b9c445..a95cc673387 100644
--- a/src/mongo/db/operation_context.h
+++ b/src/mongo/db/operation_context.h
@@ -428,6 +428,14 @@ public:
}
/**
+ * Sets that this operation should ignore interruption except for replication state change. Can
+ * only be called by the thread executing this on behalf of this OperationContext.
+ */
+ void setIgnoreInterruptsExceptForReplStateChange(bool target) {
+ _ignoreInterruptsExceptForReplStateChange = target;
+ }
+
+ /**
* Clears metadata associated with a multi-document transaction.
*/
void resetMultiDocumentTransactionState() {
@@ -620,11 +628,14 @@ private:
bool _shouldParticipateInFlowControl = true;
bool _inMultiDocumentTransaction = false;
bool _isStartingMultiDocumentTransaction = false;
+ bool _ignoreInterruptsExceptForReplStateChange = false;
// If true, this OpCtx will get interrupted during replica set stepUp and stepDown, regardless
// of what locks it's taken.
AtomicWord<bool> _alwaysInterruptAtStepDownOrUp{false};
+ AtomicWord<bool> _killRequestedForReplStateChange{false};
+
// If populated, this is an owned singleton BSONObj whose only field, 'comment', is a copy of
// the 'comment' field from the input command object.
boost::optional<BSONObj> _comment;