diff options
author | Bernard Gorman <bernard.gorman@gmail.com> | 2018-02-02 16:02:30 +0000 |
---|---|---|
committer | Bernard Gorman <bernard.gorman@gmail.com> | 2018-02-13 11:04:43 +0000 |
commit | 1221b80ee0d65879a4c76ff98f78e92b53766cc0 (patch) | |
tree | a6e4f81b3a82160ba3663daa4e4eaac07fad652e /src | |
parent | ca0a855dfc0f479d85b76a640b12a259c0547310 (diff) | |
download | mongo-1221b80ee0d65879a4c76ff98f78e92b53766cc0.tar.gz |
SERVER-32912 Ensure that killCursors always invalidates a pinned cursor
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/clientcursor.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/clientcursor.h | 6 | ||||
-rw-r--r-- | src/mongo/db/commands/getmore_cmd.cpp | 112 | ||||
-rw-r--r-- | src/mongo/db/cursor_manager.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/query/find.cpp | 23 | ||||
-rw-r--r-- | src/mongo/db/query/find_common.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/query/find_common.h | 16 | ||||
-rw-r--r-- | src/mongo/db/query/plan_executor.cpp | 19 | ||||
-rw-r--r-- | src/mongo/db/query/plan_executor.h | 20 | ||||
-rw-r--r-- | src/mongo/s/query/cluster_find.cpp | 2 |
10 files changed, 163 insertions, 74 deletions
diff --git a/src/mongo/db/clientcursor.cpp b/src/mongo/db/clientcursor.cpp index 46817485eba..3b0f6b4d297 100644 --- a/src/mongo/db/clientcursor.cpp +++ b/src/mongo/db/clientcursor.cpp @@ -115,8 +115,8 @@ ClientCursor::~ClientCursor() { } } -void ClientCursor::markAsKilled(const std::string& reason) { - _exec->markAsKilled(reason); +void ClientCursor::markAsKilled(Status killStatus) { + _exec->markAsKilled(killStatus); } void ClientCursor::dispose(OperationContext* opCtx) { diff --git a/src/mongo/db/clientcursor.h b/src/mongo/db/clientcursor.h index f6a97bfe4c9..f0f8772df73 100644 --- a/src/mongo/db/clientcursor.h +++ b/src/mongo/db/clientcursor.h @@ -268,10 +268,10 @@ private: ~ClientCursor(); /** - * Marks this cursor as killed, so any future uses will return an error status including - * 'reason'. + * Marks this cursor as killed, so any future uses will return 'killStatus'. It is an error to + * call this method with Status::OK. */ - void markAsKilled(const std::string& reason); + void markAsKilled(Status killStatus); /** * Disposes this ClientCursor's PlanExecutor. Must be called before deleting a ClientCursor to diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index 1e57c922448..901eaea3306 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -67,6 +67,37 @@ namespace { MONGO_FP_DECLARE(rsStopGetMoreCmd); +// Helper function which sets the 'msg' field of the opCtx's CurOp to the specified string, and +// returns the original value of the field. +std::string updateCurOpMsg(OperationContext* opCtx, const std::string& newMsg) { + stdx::lock_guard<Client> lk(*opCtx->getClient()); + auto oldMsg = CurOp::get(opCtx)->getMessage(); + CurOp::get(opCtx)->setMessage_inlock(newMsg.c_str()); + return oldMsg; +} + +// This helper function works much like MONGO_FAIL_POINT_PAUSE_WHILE_SET, but it additionally +// releases and re-acquires the collection readLock at regular intervals, in order to avoid +// deadlocks caused by the pinned-cursor failpoints in this file (see SERVER-21997). Finally, it +// also sets the 'msg' field of the opCtx's CurOp to the given string while the failpoint is active. +void waitWhileFailPointEnabled(FailPoint* failPoint, + OperationContext* opCtx, + const NamespaceString& nss, + const std::string& curOpMsg, + boost::optional<AutoGetCollectionForRead>* readLock) { + invariant(failPoint); + auto origCurOpMsg = updateCurOpMsg(opCtx, curOpMsg); + + while (MONGO_FAIL_POINT((*failPoint))) { + sleepFor(Milliseconds(10)); + if (readLock && *readLock) { + readLock->reset(); + readLock->emplace(opCtx, nss); + } + } + updateCurOpMsg(opCtx, origCurOpMsg); +} + /** * A command for running getMore() against an existing cursor registered with a CursorManager. * Used to generate the next batch of results for a ClientCursor. @@ -228,15 +259,15 @@ public: ClientCursor* cursor = ccPin.getValue().getCursor(); - // If the fail point is enabled, busy wait until it is disabled. - while (MONGO_FAIL_POINT(keepCursorPinnedDuringGetMore)) { - if (readLock) { - // We unlock and re-acquire the locks periodically in order to avoid deadlock (see - // SERVER-21997 for details). - sleepFor(Milliseconds(10)); - readLock.reset(); - readLock.emplace(opCtx, request.nss); - } + // If the 'waitAfterPinningCursorBeforeGetMoreBatch' fail point is enabled, set the 'msg' + // field of this operation's CurOp to signal that we've hit this point and then spin until + // the failpoint is released. + if (MONGO_FAIL_POINT(waitAfterPinningCursorBeforeGetMoreBatch)) { + waitWhileFailPointEnabled(&waitAfterPinningCursorBeforeGetMoreBatch, + opCtx, + request.nss, + "waitAfterPinningCursorBeforeGetMoreBatch", + &readLock); } // A user can only call getMore on their own cursor. If there were multiple users @@ -344,6 +375,18 @@ public: awaitDataState(opCtx).shouldWaitForInserts = true; } + // We're about to begin running the PlanExecutor in order to fill the getMore batch. If the + // 'waitWithPinnedCursorDuringGetMoreBatch' failpoint is active, set the 'msg' field of this + // operation's CurOp to signal that we've hit this point and then spin until the failpoint + // is released. + if (MONGO_FAIL_POINT(waitWithPinnedCursorDuringGetMoreBatch)) { + waitWhileFailPointEnabled(&waitWithPinnedCursorDuringGetMoreBatch, + opCtx, + request.nss, + "waitWithPinnedCursorDuringGetMoreBatch", + &readLock); + } + Status batchStatus = generateBatch(opCtx, cursor, request, &nextBatch, &state, &numResults); if (!batchStatus.isOK()) { return CommandHelpers::appendCommandStatus(result, batchStatus); @@ -388,6 +431,18 @@ public: cursorFreer.Dismiss(); } + // We're about to unpin the cursor as the ClientCursorPin goes out of scope (or delete it, + // if it has been exhausted). If the 'waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch' + // failpoint is active, set the 'msg' field of this operation's CurOp to signal that we've + // hit this point and then spin until the failpoint is released. + if (MONGO_FAIL_POINT(waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch)) { + waitWhileFailPointEnabled(&waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch, + opCtx, + request.nss, + "waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch", + &readLock); + } + return true; } @@ -451,28 +506,27 @@ public: return Status::OK(); } - if (PlanExecutor::FAILURE == *state) { - nextBatch->abandon(); - - error() << "GetMore command executor error: " << PlanExecutor::statestr(*state) - << ", stats: " << redact(Explain::getWinningPlanStats(exec)); - - return Status(ErrorCodes::OperationFailed, - str::stream() << "GetMore command executor error: " - << WorkingSetCommon::toStatusString(obj)); - } else if (PlanExecutor::DEAD == *state) { - nextBatch->abandon(); - - return Status(ErrorCodes::QueryPlanKilled, - str::stream() << "PlanExecutor killed: " - << WorkingSetCommon::toStatusString(obj)); - } else if (PlanExecutor::IS_EOF == *state) { - // This causes the reported latest oplog timestamp to advance even when there are - // no results for this particular query. - nextBatch->setLatestOplogTimestamp(exec->getLatestOplogTimestamp()); + switch (*state) { + case PlanExecutor::FAILURE: + // Log an error message and then perform the same cleanup as DEAD. + error() << "GetMore command executor error: " << PlanExecutor::statestr(*state) + << ", stats: " << redact(Explain::getWinningPlanStats(exec)); + case PlanExecutor::DEAD: { + nextBatch->abandon(); + // We should always have a valid status member object at this point. + auto status = WorkingSetCommon::getMemberObjectStatus(obj); + invariant(!status.isOK()); + return status; + } + case PlanExecutor::IS_EOF: + // This causes the reported latest oplog timestamp to advance even when there are + // no results for this particular query. + nextBatch->setLatestOplogTimestamp(exec->getLatestOplogTimestamp()); + default: + return Status::OK(); } - return Status::OK(); + MONGO_UNREACHABLE; } } getMoreCmd; diff --git a/src/mongo/db/cursor_manager.cpp b/src/mongo/db/cursor_manager.cpp index fac25953f3c..5168b638e0a 100644 --- a/src/mongo/db/cursor_manager.cpp +++ b/src/mongo/db/cursor_manager.cpp @@ -26,6 +26,8 @@ * it in the license file. */ +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery + #include "mongo/platform/basic.h" #include "mongo/db/cursor_manager.h" @@ -51,6 +53,7 @@ #include "mongo/platform/random.h" #include "mongo/stdx/memory.h" #include "mongo/util/exit.h" +#include "mongo/util/log.h" #include "mongo/util/startup_test.h" namespace mongo { @@ -420,7 +423,7 @@ void CursorManager::invalidateAll(OperationContext* opCtx, for (auto&& exec : partition) { // The PlanExecutor is owned elsewhere, so we just mark it as killed and let it be // cleaned up later. - exec->markAsKilled(reason); + exec->markAsKilled({ErrorCodes::QueryPlanKilled, reason}); } } allExecPartitions.clear(); @@ -431,7 +434,7 @@ void CursorManager::invalidateAll(OperationContext* opCtx, for (auto&& partition : allCurrentPartitions) { for (auto it = partition.begin(); it != partition.end();) { auto* cursor = it->second; - cursor->markAsKilled(reason); + cursor->markAsKilled({ErrorCodes::QueryPlanKilled, reason}); // If there's an operation actively using the cursor, then that operation is now // responsible for cleaning it up. Otherwise we can immediately dispose of it. @@ -541,9 +544,7 @@ StatusWith<ClientCursorPin> CursorManager::pinCursor(OperationContext* opCtx, !cursor->_operationUsingCursor); if (cursor->getExecutor()->isMarkedAsKilled()) { // This cursor was killed while it was idle. - Status error{ErrorCodes::QueryPlanKilled, - str::stream() << "cursor killed because: " - << cursor->getExecutor()->getKillReason()}; + Status error = cursor->getExecutor()->getKillStatus(); lockedPartition->erase(cursor->cursorid()); cursor->dispose(opCtx); delete cursor; @@ -572,10 +573,24 @@ void CursorManager::unpin(OperationContext* opCtx, ClientCursor* cursor) { // Avoid computing the current time within the critical section. auto now = opCtx->getServiceContext()->getPreciseClockSource()->now(); - auto partitionLock = _cursorMap->lockOnePartition(cursor->cursorid()); + auto partition = _cursorMap->lockOnePartition(cursor->cursorid()); invariant(cursor->_operationUsingCursor); + + // We must verify that no interrupts have occurred since we finished building the current + // batch. Otherwise, the cursor will be checked back in, the interrupted opCtx will be + // destroyed, and subsequent getMores with a fresh opCtx will succeed. + auto interruptStatus = cursor->_operationUsingCursor->checkForInterruptNoAssert(); cursor->_operationUsingCursor = nullptr; cursor->_lastUseDate = now; + if (!interruptStatus.isOK()) { + // If an interrupt occurred after the batch was completed, we remove the now-unpinned cursor + // from the CursorManager, then dispose of and delete it. + LOG(0) << "removing cursor " << cursor->cursorid() + << " after completing batch: " << interruptStatus; + partition->erase(cursor->cursorid()); + cursor->dispose(opCtx); + delete cursor; + } } void CursorManager::getCursorIds(std::set<CursorId>* openCursors) const { diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp index 6038193508e..75ba802b40f 100644 --- a/src/mongo/db/query/find.cpp +++ b/src/mongo/db/query/find.cpp @@ -202,14 +202,22 @@ void generateBatch(int ntoreturn, } // Propagate any errors to the caller. - if (PlanExecutor::FAILURE == *state) { - error() << "getMore executor error, stats: " << redact(Explain::getWinningPlanStats(exec)); - uasserted(17406, "getMore executor error: " + WorkingSetCommon::toStatusString(obj)); - } else if (PlanExecutor::DEAD == *state) { - uasserted(ErrorCodes::QueryPlanKilled, - str::stream() << "PlanExecutor killed: " - << WorkingSetCommon::toStatusString(obj)); + switch (*state) { + // Log an error message and then perform the same cleanup as DEAD. + case PlanExecutor::FAILURE: + error() << "getMore executor error, stats: " + << redact(Explain::getWinningPlanStats(exec)); + case PlanExecutor::DEAD: { + // We should always have a valid status object by this point. + auto status = WorkingSetCommon::getMemberObjectStatus(obj); + invariant(!status.isOK()); + uassertStatusOK(status); + } + default: + return; } + + MONGO_UNREACHABLE; } } // namespace @@ -324,7 +332,6 @@ Message getMore(OperationContext* opCtx, cursorid = 0; resultFlags = ResultFlag_CursorNotFound; } else { - invariant(ccPin == ErrorCodes::QueryPlanKilled || ccPin == ErrorCodes::Unauthorized); uassertStatusOK(ccPin.getStatus()); } } else { diff --git a/src/mongo/db/query/find_common.cpp b/src/mongo/db/query/find_common.cpp index b59b6e2a699..affea8b8967 100644 --- a/src/mongo/db/query/find_common.cpp +++ b/src/mongo/db/query/find_common.cpp @@ -36,10 +36,14 @@ namespace mongo { -MONGO_FP_DECLARE(keepCursorPinnedDuringGetMore); - MONGO_FP_DECLARE(disableAwaitDataForGetMoreCmd); +MONGO_FP_DECLARE(waitAfterPinningCursorBeforeGetMoreBatch); + +MONGO_FP_DECLARE(waitWithPinnedCursorDuringGetMoreBatch); + +MONGO_FP_DECLARE(waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch); + const OperationContext::Decoration<AwaitDataState> awaitDataState = OperationContext::declareDecoration<AwaitDataState>(); diff --git a/src/mongo/db/query/find_common.h b/src/mongo/db/query/find_common.h index 0e83e3cb546..6aaa8e390ff 100644 --- a/src/mongo/db/query/find_common.h +++ b/src/mongo/db/query/find_common.h @@ -54,14 +54,22 @@ extern const OperationContext::Decoration<AwaitDataState> awaitDataState; class BSONObj; class QueryRequest; -// Enabling this fail point will cause the getMore command to busy wait after pinning the cursor, -// until the fail point is disabled. -MONGO_FP_FORWARD_DECLARE(keepCursorPinnedDuringGetMore); - // Failpoint for making getMore not wait for an awaitdata cursor. Allows us to avoid waiting during // tests. MONGO_FP_FORWARD_DECLARE(disableAwaitDataForGetMoreCmd); +// Enabling this fail point will cause the getMore command to busy wait after pinning the cursor but +// before we have started building the batch, until the fail point is disabled. +MONGO_FP_FORWARD_DECLARE(waitAfterPinningCursorBeforeGetMoreBatch); + +// Enabling this fail point will cause the getMore command to busy wait with its cursor pinned while +// building the batch, until the fail point is disabled. +MONGO_FP_FORWARD_DECLARE(waitWithPinnedCursorDuringGetMoreBatch); + +// Enabling this failpoint will cause the getMore to wait just before it unpins its cursor after it +// has completed building the current batch. +MONGO_FP_FORWARD_DECLARE(waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch); + /** * Suite of find/getMore related functions used in both the mongod and mongos query paths. */ diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp index b3ad1aa08d0..a85210d0751 100644 --- a/src/mongo/db/query/plan_executor.cpp +++ b/src/mongo/db/query/plan_executor.cpp @@ -370,9 +370,7 @@ Status PlanExecutor::restoreStateWithoutRetrying() { } _currentState = kUsable; - return isMarkedAsKilled() - ? Status{ErrorCodes::QueryPlanKilled, "query killed during yield: " + *_killReason} - : Status::OK(); + return _killStatus; } void PlanExecutor::detachFromOperationContext() { @@ -501,10 +499,8 @@ PlanExecutor::ExecState PlanExecutor::getNextImpl(Snapshotted<BSONObj>* objOut, invariant(_currentState == kUsable); if (isMarkedAsKilled()) { if (NULL != objOut) { - Status status(ErrorCodes::OperationFailed, - str::stream() << "Operation aborted because: " << *_killReason); *objOut = Snapshotted<BSONObj>(SnapshotId(), - WorkingSetCommon::buildMemberStatusObject(status)); + WorkingSetCommon::buildMemberStatusObject(_killStatus)); } return PlanExecutor::DEAD; } @@ -646,8 +642,12 @@ bool PlanExecutor::isEOF() { return isMarkedAsKilled() || (_stash.empty() && _root->isEOF()); } -void PlanExecutor::markAsKilled(string reason) { - _killReason = std::move(reason); +void PlanExecutor::markAsKilled(Status killStatus) { + invariant(!killStatus.isOK()); + // If killed multiple times, only retain the first status. + if (_killStatus.isOK()) { + _killStatus = killStatus; + } } void PlanExecutor::dispose(OperationContext* opCtx, CursorManager* cursorManager) { @@ -678,8 +678,7 @@ Status PlanExecutor::executePlan() { if (PlanExecutor::DEAD == state || PlanExecutor::FAILURE == state) { if (isMarkedAsKilled()) { - return Status(ErrorCodes::QueryPlanKilled, - str::stream() << "Operation aborted because: " << *_killReason); + return _killStatus; } auto errorStatus = WorkingSetCommon::getMemberObjectStatus(obj); diff --git a/src/mongo/db/query/plan_executor.h b/src/mongo/db/query/plan_executor.h index 9ce62619e2d..182dfa168c5 100644 --- a/src/mongo/db/query/plan_executor.h +++ b/src/mongo/db/query/plan_executor.h @@ -374,10 +374,12 @@ public: /** * If we're yielding locks, the database we're operating over or any collection we're relying on * may be dropped. Plan executors are notified of such events by calling markAsKilled(). - * Callers must specify the 'reason' for why this executor is being killed. Subsequent calls to - * getNext() will return DEAD, and fill 'objOut' with an error detail including 'reason'. + * Callers must specify the reason for why this executor is being killed. Subsequent calls to + * getNext() will return DEAD, and fill 'objOut' with an error reflecting 'killStatus'. If this + * method is called multiple times, only the first 'killStatus' will be retained. It is an error + * to call this method with Status::OK. */ - void markAsKilled(std::string reason); + void markAsKilled(Status killStatus); /** * Cleans up any state associated with this PlanExecutor. Must be called before deleting this @@ -446,12 +448,12 @@ public: } bool isMarkedAsKilled() const { - return static_cast<bool>(_killReason); + return !_killStatus.isOK(); } - const std::string& getKillReason() { + Status getKillStatus() { invariant(isMarkedAsKilled()); - return *_killReason; + return _killStatus; } bool isDisposed() const { @@ -550,9 +552,9 @@ private: std::unique_ptr<QuerySolution> _qs; std::unique_ptr<PlanStage> _root; - // If _killReason has a value, then we have been killed and the value represents the reason for - // the kill. - boost::optional<std::string> _killReason; + // If _killStatus has a non-OK value, then we have been killed and the value represents the + // reason for the kill. + Status _killStatus = Status::OK(); // What namespace are we operating over? NamespaceString _nss; diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp index d8f27ea170b..1ce61880996 100644 --- a/src/mongo/s/query/cluster_find.cpp +++ b/src/mongo/s/query/cluster_find.cpp @@ -405,7 +405,7 @@ StatusWith<CursorResponse> ClusterFind::runGetMore(OperationContext* opCtx, invariant(request.cursorid == pinnedCursor.getValue().getCursorId()); // If the fail point is enabled, busy wait until it is disabled. - while (MONGO_FAIL_POINT(keepCursorPinnedDuringGetMore)) { + while (MONGO_FAIL_POINT(waitAfterPinningCursorBeforeGetMoreBatch)) { } if (auto readPref = pinnedCursor.getValue().getReadPreference()) { |