summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWenbin Zhu <wenbin.zhu@mongodb.com>2021-11-01 20:37:20 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-11-01 20:51:44 +0000
commit8443b63b12e53d21590b1f7dcf02c5df26b9ff40 (patch)
tree93eaa88ddf26d7cc996f03654ff2d2095bdbec2c
parentd1a9e68d581f8e3162bc790bdb30986b8d030d86 (diff)
downloadmongo-8443b63b12e53d21590b1f7dcf02c5df26b9ff40.tar.gz
Revert "SERVER-59226 Fix deadlock between uninterruptible profiling operation and stepdown thread."
This reverts commit d1a9e68d581f8e3162bc790bdb30986b8d030d86.
-rw-r--r--src/mongo/db/introspect.cpp28
-rw-r--r--src/mongo/db/operation_context.cpp17
-rw-r--r--src/mongo/db/operation_context.h11
3 files changed, 9 insertions, 47 deletions
diff --git a/src/mongo/db/introspect.cpp b/src/mongo/db/introspect.cpp
index 78ae238a239..8bda2543426 100644
--- a/src/mongo/db/introspect.cpp
+++ b/src/mongo/db/introspect.cpp
@@ -50,9 +50,9 @@
namespace mongo {
+using std::unique_ptr;
using std::endl;
using std::string;
-using std::unique_ptr;
namespace {
@@ -117,32 +117,20 @@ void profile(OperationContext* opCtx, NetworkOp op) {
const string dbName(nsToDatabase(CurOp::get(opCtx)->getNS()));
- // True if we need to acquire an X lock on the database in order to create the system.profile
- // collection.
- bool acquireDbXLock = false;
-
try {
- // Set the opCtx to be only interruptible for replication state changes. This is needed
- // because for some operations that are already marked as killed due to errors such as
- // operation time exceeding maxTimeMS, we still want to output the profiler entry. Thus
- // in these cases we do not interrupt lock acquisition even though the opCtx has already
- // been killed. In the meantime we need to make sure replication state changes can still
- // interrupt lock acquisition, otherwise there could be deadlocks when the state change
- // thread is waiting for the session checked out by this opCtx while holding RSTL lock.
- // However when maxLockTimeout is set, we want it to be always interruptible.
+ // Even if the operation we are profiling was interrupted, we still want to output the
+ // profiler entry. This lock guard will prevent lock acquisitions from throwing exceptions
+ // before we finish writing the entry. However, our maximum lock timeout overrides
+ // uninterruptibility.
+ boost::optional<UninterruptibleLockGuard> noInterrupt;
if (!opCtx->lockState()->hasMaxLockTimeout()) {
- opCtx->setIgnoreInterruptsExceptForReplStateChange(true);
+ noInterrupt.emplace(opCtx->lockState());
}
- // IX lock acquisitions beyond this block will not be related to writes to system.profile.
- ON_BLOCK_EXIT([opCtx] { opCtx->setIgnoreInterruptsExceptForReplStateChange(false); });
-
+ bool acquireDbXLock = false;
while (true) {
std::unique_ptr<AutoGetDb> autoGetDb;
if (acquireDbXLock) {
- // We should not attempt to acquire an X lock while opCtx ignores interrupts.
- opCtx->setIgnoreInterruptsExceptForReplStateChange(false);
-
autoGetDb.reset(new AutoGetDb(opCtx, dbName, MODE_X));
if (autoGetDb->getDb()) {
createProfileCollection(opCtx, autoGetDb->getDb()).transitional_ignore();
diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp
index 3775b73a82f..02a3b1226d5 100644
--- a/src/mongo/db/operation_context.cpp
+++ b/src/mongo/db/operation_context.cpp
@@ -196,14 +196,6 @@ bool opShouldFail(Client* client, const BSONObj& failPointInfo) {
} // namespace
Status OperationContext::checkForInterruptNoAssert() {
- const auto killStatus = getKillStatus();
-
- if (_ignoreInterruptsExceptForReplStateChange &&
- killStatus != ErrorCodes::InterruptedDueToReplStateChange &&
- !_killRequestedForReplStateChange.loadRelaxed()) {
- return Status::OK();
- }
-
// TODO: Remove the MONGO_likely(getClient()) once all operation contexts are constructed with
// clients.
if (MONGO_likely(getClient() && getServiceContext()) &&
@@ -223,6 +215,7 @@ Status OperationContext::checkForInterruptNoAssert() {
}
}
+ const auto killStatus = getKillStatus();
if (killStatus != ErrorCodes::OK) {
return Status(killStatus, "operation was interrupted");
}
@@ -368,14 +361,6 @@ void OperationContext::markKilled(ErrorCodes::Error killCode) {
});
lkWaitMutex = stdx::unique_lock<stdx::mutex>{*_waitMutex};
}
-
- // Record that a kill was requested on this operationContext due to replication state change
- // since it is possible to call markKilled() multiple times but only the first killCode will
- // be preserved.
- if (killCode == ErrorCodes::InterruptedDueToReplStateChange) {
- _killRequestedForReplStateChange.store(true);
- }
-
_killCode.compareAndSwap(ErrorCodes::OK, killCode);
if (lkWaitMutex && _numKillers == 0) {
invariant(_waitCV);
diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h
index a2eacc1e700..4a9bc116da4 100644
--- a/src/mongo/db/operation_context.h
+++ b/src/mongo/db/operation_context.h
@@ -432,14 +432,6 @@ public:
*/
Microseconds getRemainingMaxTimeMicros() const;
- /**
- * Sets that this operation should ignore interruption except for replication state change. Can
- * only be called by the thread executing this on behalf of this OperationContext.
- */
- void setIgnoreInterruptsExceptForReplStateChange(bool target) {
- _ignoreInterruptsExceptForReplStateChange = target;
- }
-
private:
/**
* Returns true if this operation has a deadline and it has passed according to the fast clock
@@ -529,9 +521,6 @@ private:
Timer _elapsedTime;
bool _writesAreReplicated = true;
-
- bool _ignoreInterruptsExceptForReplStateChange = false;
- AtomicWord<bool> _killRequestedForReplStateChange{false};
};
namespace repl {