diff options
-rw-r--r-- | jstests/replsets/initial_sync_during_stepdown.js | 4 | ||||
-rw-r--r-- | src/mongo/db/catalog/rename_collection.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/clientcursor.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/clientcursor.h | 14 | ||||
-rw-r--r-- | src/mongo/db/commands/getmore_cmd.cpp | 39 | ||||
-rw-r--r-- | src/mongo/db/concurrency/locker.h | 41 | ||||
-rw-r--r-- | src/mongo/db/exhaust_cursor_currentop_integration_test.cpp | 2 |
7 files changed, 67 insertions, 45 deletions
diff --git a/jstests/replsets/initial_sync_during_stepdown.js b/jstests/replsets/initial_sync_during_stepdown.js index 342d57d141d..fc3c00564e7 100644 --- a/jstests/replsets/initial_sync_during_stepdown.js +++ b/jstests/replsets/initial_sync_during_stepdown.js @@ -48,7 +48,7 @@ function setupTest({ jsTestLog("Enabling failpoint '" + failPoint + "' on primary (sync source)."); assert.commandWorked(primary.adminCommand({ configureFailPoint: failPoint, - data: {nss: nss + nssSuffix, shouldCheckForInterrupt: true, shouldNotdropLock: true}, + data: {nss: nss + nssSuffix, shouldCheckForInterrupt: true}, mode: "alwaysOn" })); @@ -164,7 +164,7 @@ assert.commandWorked(primaryColl.insert([{_id: 3}, {_id: 4}])); // such that it doesn't drop locks when getmore cmd waits inside the fail point block. assert.commandWorked(primary.adminCommand({ configureFailPoint: "waitWithPinnedCursorDuringGetMoreBatch", - data: {nss: oplogNss, shouldCheckForInterrupt: true, shouldNotdropLock: true}, + data: {nss: oplogNss, shouldCheckForInterrupt: true}, mode: "alwaysOn" })); diff --git a/src/mongo/db/catalog/rename_collection.cpp b/src/mongo/db/catalog/rename_collection.cpp index 0f4a7fcea93..8d7e315cb25 100644 --- a/src/mongo/db/catalog/rename_collection.cpp +++ b/src/mongo/db/catalog/rename_collection.cpp @@ -206,10 +206,6 @@ Status renameCollectionDirectly(OperationContext* opCtx, return status; } - // Rename is not resilient to interruption when the onRenameCollection OpObserver - // takes an oplog collection lock. - UninterruptibleLockGuard noInterrupt(opCtx->lockState()); - // We have to override the provided 'dropTarget' setting for idempotency reasons to // avoid unintentionally removing a collection on a secondary with the same name as // the target. diff --git a/src/mongo/db/clientcursor.cpp b/src/mongo/db/clientcursor.cpp index 860e5027fc1..2a471eb1288 100644 --- a/src/mongo/db/clientcursor.cpp +++ b/src/mongo/db/clientcursor.cpp @@ -164,7 +164,10 @@ GenericCursor ClientCursor::toGenericCursor() const { ClientCursorPin::ClientCursorPin(OperationContext* opCtx, ClientCursor* cursor, CursorManager* cursorManager) - : _opCtx(opCtx), _cursor(cursor), _cursorManager(cursorManager) { + : _opCtx(opCtx), + _cursor(cursor), + _cursorManager(cursorManager), + _interruptibleLockGuard(std::make_unique<InterruptibleLockGuard>(opCtx->lockState())) { invariant(_cursor); invariant(_cursor->_operationUsingCursor); invariant(!_cursor->_disposed); @@ -181,6 +184,7 @@ ClientCursorPin::ClientCursorPin(ClientCursorPin&& other) : _opCtx(other._opCtx), _cursor(other._cursor), _cursorManager(other._cursorManager), + _interruptibleLockGuard(std::move(other._interruptibleLockGuard)), _shouldSaveRecoveryUnit(other._shouldSaveRecoveryUnit) { // The pinned cursor is being transferred to us from another pin. The 'other' pin must have a // pinned cursor. @@ -217,6 +221,8 @@ ClientCursorPin& ClientCursorPin::operator=(ClientCursorPin&& other) { _cursorManager = other._cursorManager; other._cursorManager = nullptr; + _interruptibleLockGuard = std::move(other._interruptibleLockGuard); + _shouldSaveRecoveryUnit = other._shouldSaveRecoveryUnit; other._shouldSaveRecoveryUnit = false; diff --git a/src/mongo/db/clientcursor.h b/src/mongo/db/clientcursor.h index 121566cbdb8..89239e6230d 100644 --- a/src/mongo/db/clientcursor.h +++ b/src/mongo/db/clientcursor.h @@ -541,6 +541,20 @@ private: OperationContext* _opCtx = nullptr; ClientCursor* _cursor = nullptr; CursorManager* _cursorManager = nullptr; + + // A pinned cursor takes ownership of storage resources (storage-level cursors owned by the + // PlanExecutor) without lock-manager locks. Such an operation must ensure interruptibility when + // later acquiring a lock in order to avoid deadlocking with replication rollback at the storage + // engine level. Rollback signals interrupt to active readers, acquires a global X lock and then + // waits for all storage cursors to be closed in order to proceed; while a pinned cursor + // operation holds storage-level cursors and then may try to acquire a lock. + // + // An operation holding a pinned cursor must never have an UninterruptibleLockGuard on the stack + // that causes lock acquisition to hang without checking for interrupt. This + // InterruptibleLockGuard ensures that operations holding a ClientCursorPin will eventually + // observe and obey interrupt signals in the locking layer. + std::unique_ptr<InterruptibleLockGuard> _interruptibleLockGuard; + bool _shouldSaveRecoveryUnit = false; }; diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index 532d5996631..ec99c214b1f 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -533,34 +533,12 @@ public: opCtx, nss, true)); } - // If the 'waitAfterPinningCursorBeforeGetMoreBatch' fail point is enabled, set the - // 'msg' field of this operation's CurOp to signal that we've hit this point and then - // repeatedly release and re-acquire the collection readLock at regular intervals until - // the failpoint is released. This is done in order to avoid deadlocks caused by the - // pinned-cursor failpoints in this file (see SERVER-21997). - std::function<void()> dropAndReacquireReadLockIfLocked = - [&readLock, opCtx, &cursorPin]() { - if (!readLock) { - // This function is a no-op if 'readLock' is not held in the first place. - return; - } - - // Make sure an interrupted operation does not prevent us from reacquiring the - // lock. - UninterruptibleLockGuard noInterrupt(opCtx->lockState()); - readLock.reset(); - readLock.emplace(opCtx, - cursorPin->getExecutor()->nss(), - AutoGetCollectionViewMode::kViewsForbidden, - Date_t::max(), - cursorPin->getExecutor()->getSecondaryNamespaces()); - }; if (MONGO_unlikely(waitAfterPinningCursorBeforeGetMoreBatch.shouldFail())) { CurOpFailpointHelpers::waitWhileFailPointEnabled( &waitAfterPinningCursorBeforeGetMoreBatch, opCtx, "waitAfterPinningCursorBeforeGetMoreBatch", - dropAndReacquireReadLockIfLocked, + []() {}, /*empty function*/ nss); } @@ -641,25 +619,12 @@ public: awaitDataState(opCtx).shouldWaitForInserts = true; } - // We're about to begin running the PlanExecutor in order to fill the getMore batch. If - // the 'waitWithPinnedCursorDuringGetMoreBatch' failpoint is active, set the 'msg' field - // of this operation's CurOp to signal that we've hit this point and then spin until the - // failpoint is released. - std::function<void()> saveAndRestoreStateWithReadLockReacquisition = - [exec, dropAndReacquireReadLockIfLocked, &readLock]() { - exec->saveState(); - dropAndReacquireReadLockIfLocked(); - exec->restoreState(readLock ? &readLock->getCollection() : nullptr); - }; - waitWithPinnedCursorDuringGetMoreBatch.execute([&](const BSONObj& data) { CurOpFailpointHelpers::waitWhileFailPointEnabled( &waitWithPinnedCursorDuringGetMoreBatch, opCtx, "waitWithPinnedCursorDuringGetMoreBatch", - data["shouldNotdropLock"].booleanSafe() - ? []() {} /*empty function*/ - : saveAndRestoreStateWithReadLockReacquisition, + []() {}, /*empty function*/ nss); }); diff --git a/src/mongo/db/concurrency/locker.h b/src/mongo/db/concurrency/locker.h index fdb7d816da3..bd7a80fe19e 100644 --- a/src/mongo/db/concurrency/locker.h +++ b/src/mongo/db/concurrency/locker.h @@ -52,6 +52,7 @@ class Locker { Locker& operator=(const Locker&) = delete; friend class UninterruptibleLockGuard; + friend class InterruptibleLockGuard; public: virtual ~Locker() {} @@ -567,6 +568,13 @@ protected: int _uninterruptibleLocksRequested = 0; /** + * The number of callers that are guarding against uninterruptible lock requests. An int, + * instead of a boolean, to support multiple simultaneous requests. When > 0, ensures that + * _uninterruptibleLocksRequested above is _not_ used. + */ + int _keepInterruptibleRequests = 0; + + /** * The number of LockRequests to unlock at the end of this WUOW. This is used for locks * participating in two-phase locking. */ @@ -601,6 +609,7 @@ public: */ explicit UninterruptibleLockGuard(Locker* locker) : _locker(locker) { invariant(_locker); + invariant(_locker->_keepInterruptibleRequests == 0); invariant(_locker->_uninterruptibleLocksRequested >= 0); invariant(_locker->_uninterruptibleLocksRequested < std::numeric_limits<int>::max()); _locker->_uninterruptibleLocksRequested += 1; @@ -616,6 +625,38 @@ private: }; /** + * This RAII type ensures that there are no uninterruptible lock acquisitions while in scope. If an + * UninterruptibleLockGuard is held at a higher level, or taken at a lower level, an invariant will + * occur. This protects against UninterruptibleLockGuard uses on code paths that must be + * interruptible. Safe to nest InterruptibleLockGuard instances. + */ +class InterruptibleLockGuard { + InterruptibleLockGuard(const InterruptibleLockGuard& other) = delete; + InterruptibleLockGuard(InterruptibleLockGuard&& other) = delete; + +public: + /* + * Accepts a Locker, and increments the Locker's _keepInterruptibleRequests counter. Decrements + * the counter when destroyed. + */ + explicit InterruptibleLockGuard(Locker* locker) : _locker(locker) { + invariant(_locker); + invariant(_locker->_uninterruptibleLocksRequested == 0); + invariant(_locker->_keepInterruptibleRequests >= 0); + invariant(_locker->_keepInterruptibleRequests < std::numeric_limits<int>::max()); + _locker->_keepInterruptibleRequests += 1; + } + + ~InterruptibleLockGuard() { + invariant(_locker->_keepInterruptibleRequests > 0); + _locker->_keepInterruptibleRequests -= 1; + } + +private: + Locker* const _locker; +}; + +/** * RAII-style class to opt out of replication's use of the ParallelBatchWriterMode lock. */ class ShouldNotConflictWithSecondaryBatchApplicationBlock { diff --git a/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp b/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp index ca248706cf8..3bb0217b72b 100644 --- a/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp +++ b/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp @@ -76,7 +76,7 @@ void setWaitWithPinnedCursorDuringGetMoreBatchFailpoint(DBClientBase* conn, bool auto cmdObj = BSON("configureFailPoint" << "waitWithPinnedCursorDuringGetMoreBatch" << "mode" << (enable ? "alwaysOn" : "off") << "data" - << BSON("shouldNotdropLock" << true << "shouldContinueOnInterrupt" << true)); + << BSON("shouldContinueOnInterrupt" << true)); auto reply = conn->runCommand(OpMsgRequest::fromDBAndBody("admin", cmdObj)); ASSERT_OK(getStatusFromCommandResult(reply->getCommandReply())); } |