summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDianna Hohensee <dianna.hohensee@mongodb.com>2022-09-02 03:32:06 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-09-02 04:47:05 +0000
commit97cdfef9e4f74918176f1916bc53ada8f9f66868 (patch)
tree641eb38b0c182d564474a80c0cb9eac0e7b47e69
parent8de369efe3fb47e15000f6a8af722b53911392e6 (diff)
downloadmongo-97cdfef9e4f74918176f1916bc53ada8f9f66868.tar.gz
SERVER-69059 Create an InterruptibleLockGuard to place in ClientCursorPin to ensure read operations are interruptible
-rw-r--r--jstests/replsets/initial_sync_during_stepdown.js4
-rw-r--r--src/mongo/db/catalog/rename_collection.cpp4
-rw-r--r--src/mongo/db/clientcursor.cpp8
-rw-r--r--src/mongo/db/clientcursor.h14
-rw-r--r--src/mongo/db/commands/getmore_cmd.cpp39
-rw-r--r--src/mongo/db/concurrency/locker.h41
-rw-r--r--src/mongo/db/exhaust_cursor_currentop_integration_test.cpp2
7 files changed, 67 insertions, 45 deletions
diff --git a/jstests/replsets/initial_sync_during_stepdown.js b/jstests/replsets/initial_sync_during_stepdown.js
index 342d57d141d..fc3c00564e7 100644
--- a/jstests/replsets/initial_sync_during_stepdown.js
+++ b/jstests/replsets/initial_sync_during_stepdown.js
@@ -48,7 +48,7 @@ function setupTest({
jsTestLog("Enabling failpoint '" + failPoint + "' on primary (sync source).");
assert.commandWorked(primary.adminCommand({
configureFailPoint: failPoint,
- data: {nss: nss + nssSuffix, shouldCheckForInterrupt: true, shouldNotdropLock: true},
+ data: {nss: nss + nssSuffix, shouldCheckForInterrupt: true},
mode: "alwaysOn"
}));
@@ -164,7 +164,7 @@ assert.commandWorked(primaryColl.insert([{_id: 3}, {_id: 4}]));
// such that it doesn't drop locks when getmore cmd waits inside the fail point block.
assert.commandWorked(primary.adminCommand({
configureFailPoint: "waitWithPinnedCursorDuringGetMoreBatch",
- data: {nss: oplogNss, shouldCheckForInterrupt: true, shouldNotdropLock: true},
+ data: {nss: oplogNss, shouldCheckForInterrupt: true},
mode: "alwaysOn"
}));
diff --git a/src/mongo/db/catalog/rename_collection.cpp b/src/mongo/db/catalog/rename_collection.cpp
index 0f4a7fcea93..8d7e315cb25 100644
--- a/src/mongo/db/catalog/rename_collection.cpp
+++ b/src/mongo/db/catalog/rename_collection.cpp
@@ -206,10 +206,6 @@ Status renameCollectionDirectly(OperationContext* opCtx,
return status;
}
- // Rename is not resilient to interruption when the onRenameCollection OpObserver
- // takes an oplog collection lock.
- UninterruptibleLockGuard noInterrupt(opCtx->lockState());
-
// We have to override the provided 'dropTarget' setting for idempotency reasons to
// avoid unintentionally removing a collection on a secondary with the same name as
// the target.
diff --git a/src/mongo/db/clientcursor.cpp b/src/mongo/db/clientcursor.cpp
index 860e5027fc1..2a471eb1288 100644
--- a/src/mongo/db/clientcursor.cpp
+++ b/src/mongo/db/clientcursor.cpp
@@ -164,7 +164,10 @@ GenericCursor ClientCursor::toGenericCursor() const {
ClientCursorPin::ClientCursorPin(OperationContext* opCtx,
ClientCursor* cursor,
CursorManager* cursorManager)
- : _opCtx(opCtx), _cursor(cursor), _cursorManager(cursorManager) {
+ : _opCtx(opCtx),
+ _cursor(cursor),
+ _cursorManager(cursorManager),
+ _interruptibleLockGuard(std::make_unique<InterruptibleLockGuard>(opCtx->lockState())) {
invariant(_cursor);
invariant(_cursor->_operationUsingCursor);
invariant(!_cursor->_disposed);
@@ -181,6 +184,7 @@ ClientCursorPin::ClientCursorPin(ClientCursorPin&& other)
: _opCtx(other._opCtx),
_cursor(other._cursor),
_cursorManager(other._cursorManager),
+ _interruptibleLockGuard(std::move(other._interruptibleLockGuard)),
_shouldSaveRecoveryUnit(other._shouldSaveRecoveryUnit) {
// The pinned cursor is being transferred to us from another pin. The 'other' pin must have a
// pinned cursor.
@@ -217,6 +221,8 @@ ClientCursorPin& ClientCursorPin::operator=(ClientCursorPin&& other) {
_cursorManager = other._cursorManager;
other._cursorManager = nullptr;
+ _interruptibleLockGuard = std::move(other._interruptibleLockGuard);
+
_shouldSaveRecoveryUnit = other._shouldSaveRecoveryUnit;
other._shouldSaveRecoveryUnit = false;
diff --git a/src/mongo/db/clientcursor.h b/src/mongo/db/clientcursor.h
index 121566cbdb8..89239e6230d 100644
--- a/src/mongo/db/clientcursor.h
+++ b/src/mongo/db/clientcursor.h
@@ -541,6 +541,20 @@ private:
OperationContext* _opCtx = nullptr;
ClientCursor* _cursor = nullptr;
CursorManager* _cursorManager = nullptr;
+
+ // A pinned cursor takes ownership of storage resources (storage-level cursors owned by the
+ // PlanExecutor) without lock-manager locks. Such an operation must ensure interruptibility when
+ // later acquiring a lock in order to avoid deadlocking with replication rollback at the storage
+ // engine level. Rollback signals interrupt to active readers, acquires a global X lock and then
+ // waits for all storage cursors to be closed in order to proceed; while a pinned cursor
+ // operation holds storage-level cursors and then may try to acquire a lock.
+ //
+ // An operation holding a pinned cursor must never have an UninterruptibleLockGuard on the stack
+ // that causes lock acquisition to hang without checking for interrupt. This
+ // InterruptibleLockGuard ensures that operations holding a ClientCursorPin will eventually
+ // observe and obey interrupt signals in the locking layer.
+ std::unique_ptr<InterruptibleLockGuard> _interruptibleLockGuard;
+
bool _shouldSaveRecoveryUnit = false;
};
diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp
index 532d5996631..ec99c214b1f 100644
--- a/src/mongo/db/commands/getmore_cmd.cpp
+++ b/src/mongo/db/commands/getmore_cmd.cpp
@@ -533,34 +533,12 @@ public:
opCtx, nss, true));
}
- // If the 'waitAfterPinningCursorBeforeGetMoreBatch' fail point is enabled, set the
- // 'msg' field of this operation's CurOp to signal that we've hit this point and then
- // repeatedly release and re-acquire the collection readLock at regular intervals until
- // the failpoint is released. This is done in order to avoid deadlocks caused by the
- // pinned-cursor failpoints in this file (see SERVER-21997).
- std::function<void()> dropAndReacquireReadLockIfLocked =
- [&readLock, opCtx, &cursorPin]() {
- if (!readLock) {
- // This function is a no-op if 'readLock' is not held in the first place.
- return;
- }
-
- // Make sure an interrupted operation does not prevent us from reacquiring the
- // lock.
- UninterruptibleLockGuard noInterrupt(opCtx->lockState());
- readLock.reset();
- readLock.emplace(opCtx,
- cursorPin->getExecutor()->nss(),
- AutoGetCollectionViewMode::kViewsForbidden,
- Date_t::max(),
- cursorPin->getExecutor()->getSecondaryNamespaces());
- };
if (MONGO_unlikely(waitAfterPinningCursorBeforeGetMoreBatch.shouldFail())) {
CurOpFailpointHelpers::waitWhileFailPointEnabled(
&waitAfterPinningCursorBeforeGetMoreBatch,
opCtx,
"waitAfterPinningCursorBeforeGetMoreBatch",
- dropAndReacquireReadLockIfLocked,
+ []() {}, /*empty function*/
nss);
}
@@ -641,25 +619,12 @@ public:
awaitDataState(opCtx).shouldWaitForInserts = true;
}
- // We're about to begin running the PlanExecutor in order to fill the getMore batch. If
- // the 'waitWithPinnedCursorDuringGetMoreBatch' failpoint is active, set the 'msg' field
- // of this operation's CurOp to signal that we've hit this point and then spin until the
- // failpoint is released.
- std::function<void()> saveAndRestoreStateWithReadLockReacquisition =
- [exec, dropAndReacquireReadLockIfLocked, &readLock]() {
- exec->saveState();
- dropAndReacquireReadLockIfLocked();
- exec->restoreState(readLock ? &readLock->getCollection() : nullptr);
- };
-
waitWithPinnedCursorDuringGetMoreBatch.execute([&](const BSONObj& data) {
CurOpFailpointHelpers::waitWhileFailPointEnabled(
&waitWithPinnedCursorDuringGetMoreBatch,
opCtx,
"waitWithPinnedCursorDuringGetMoreBatch",
- data["shouldNotdropLock"].booleanSafe()
- ? []() {} /*empty function*/
- : saveAndRestoreStateWithReadLockReacquisition,
+ []() {}, /*empty function*/
nss);
});
diff --git a/src/mongo/db/concurrency/locker.h b/src/mongo/db/concurrency/locker.h
index fdb7d816da3..bd7a80fe19e 100644
--- a/src/mongo/db/concurrency/locker.h
+++ b/src/mongo/db/concurrency/locker.h
@@ -52,6 +52,7 @@ class Locker {
Locker& operator=(const Locker&) = delete;
friend class UninterruptibleLockGuard;
+ friend class InterruptibleLockGuard;
public:
virtual ~Locker() {}
@@ -567,6 +568,13 @@ protected:
int _uninterruptibleLocksRequested = 0;
/**
+ * The number of callers that are guarding against uninterruptible lock requests. An int,
+ * instead of a boolean, to support multiple simultaneous requests. When > 0, ensures that
+ * _uninterruptibleLocksRequested above is _not_ used.
+ */
+ int _keepInterruptibleRequests = 0;
+
+ /**
* The number of LockRequests to unlock at the end of this WUOW. This is used for locks
* participating in two-phase locking.
*/
@@ -601,6 +609,7 @@ public:
*/
explicit UninterruptibleLockGuard(Locker* locker) : _locker(locker) {
invariant(_locker);
+ invariant(_locker->_keepInterruptibleRequests == 0);
invariant(_locker->_uninterruptibleLocksRequested >= 0);
invariant(_locker->_uninterruptibleLocksRequested < std::numeric_limits<int>::max());
_locker->_uninterruptibleLocksRequested += 1;
@@ -616,6 +625,38 @@ private:
};
/**
+ * This RAII type ensures that there are no uninterruptible lock acquisitions while in scope. If an
+ * UninterruptibleLockGuard is held at a higher level, or taken at a lower level, an invariant will
+ * occur. This protects against UninterruptibleLockGuard uses on code paths that must be
+ * interruptible. Safe to nest InterruptibleLockGuard instances.
+ */
+class InterruptibleLockGuard {
+ InterruptibleLockGuard(const InterruptibleLockGuard& other) = delete;
+ InterruptibleLockGuard(InterruptibleLockGuard&& other) = delete;
+
+public:
+ /*
+ * Accepts a Locker, and increments the Locker's _keepInterruptibleRequests counter. Decrements
+ * the counter when destroyed.
+ */
+ explicit InterruptibleLockGuard(Locker* locker) : _locker(locker) {
+ invariant(_locker);
+ invariant(_locker->_uninterruptibleLocksRequested == 0);
+ invariant(_locker->_keepInterruptibleRequests >= 0);
+ invariant(_locker->_keepInterruptibleRequests < std::numeric_limits<int>::max());
+ _locker->_keepInterruptibleRequests += 1;
+ }
+
+ ~InterruptibleLockGuard() {
+ invariant(_locker->_keepInterruptibleRequests > 0);
+ _locker->_keepInterruptibleRequests -= 1;
+ }
+
+private:
+ Locker* const _locker;
+};
+
+/**
* RAII-style class to opt out of replication's use of the ParallelBatchWriterMode lock.
*/
class ShouldNotConflictWithSecondaryBatchApplicationBlock {
diff --git a/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp b/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp
index ca248706cf8..3bb0217b72b 100644
--- a/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp
+++ b/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp
@@ -76,7 +76,7 @@ void setWaitWithPinnedCursorDuringGetMoreBatchFailpoint(DBClientBase* conn, bool
auto cmdObj = BSON("configureFailPoint"
<< "waitWithPinnedCursorDuringGetMoreBatch"
<< "mode" << (enable ? "alwaysOn" : "off") << "data"
- << BSON("shouldNotdropLock" << true << "shouldContinueOnInterrupt" << true));
+ << BSON("shouldContinueOnInterrupt" << true));
auto reply = conn->runCommand(OpMsgRequest::fromDBAndBody("admin", cmdObj));
ASSERT_OK(getStatusFromCommandResult(reply->getCommandReply()));
}