summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBernard Gorman <bernard.gorman@gmail.com>2018-02-02 16:02:30 +0000
committerBernard Gorman <bernard.gorman@gmail.com>2018-02-13 11:04:43 +0000
commit1221b80ee0d65879a4c76ff98f78e92b53766cc0 (patch)
treea6e4f81b3a82160ba3663daa4e4eaac07fad652e /src
parentca0a855dfc0f479d85b76a640b12a259c0547310 (diff)
downloadmongo-1221b80ee0d65879a4c76ff98f78e92b53766cc0.tar.gz
SERVER-32912 Ensure that killCursors always invalidates a pinned cursor
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/clientcursor.cpp4
-rw-r--r--src/mongo/db/clientcursor.h6
-rw-r--r--src/mongo/db/commands/getmore_cmd.cpp112
-rw-r--r--src/mongo/db/cursor_manager.cpp27
-rw-r--r--src/mongo/db/query/find.cpp23
-rw-r--r--src/mongo/db/query/find_common.cpp8
-rw-r--r--src/mongo/db/query/find_common.h16
-rw-r--r--src/mongo/db/query/plan_executor.cpp19
-rw-r--r--src/mongo/db/query/plan_executor.h20
-rw-r--r--src/mongo/s/query/cluster_find.cpp2
10 files changed, 163 insertions, 74 deletions
diff --git a/src/mongo/db/clientcursor.cpp b/src/mongo/db/clientcursor.cpp
index 46817485eba..3b0f6b4d297 100644
--- a/src/mongo/db/clientcursor.cpp
+++ b/src/mongo/db/clientcursor.cpp
@@ -115,8 +115,8 @@ ClientCursor::~ClientCursor() {
}
}
-void ClientCursor::markAsKilled(const std::string& reason) {
- _exec->markAsKilled(reason);
+void ClientCursor::markAsKilled(Status killStatus) {
+ _exec->markAsKilled(killStatus);
}
void ClientCursor::dispose(OperationContext* opCtx) {
diff --git a/src/mongo/db/clientcursor.h b/src/mongo/db/clientcursor.h
index f6a97bfe4c9..f0f8772df73 100644
--- a/src/mongo/db/clientcursor.h
+++ b/src/mongo/db/clientcursor.h
@@ -268,10 +268,10 @@ private:
~ClientCursor();
/**
- * Marks this cursor as killed, so any future uses will return an error status including
- * 'reason'.
+ * Marks this cursor as killed, so any future uses will return 'killStatus'. It is an error to
+ * call this method with Status::OK.
*/
- void markAsKilled(const std::string& reason);
+ void markAsKilled(Status killStatus);
/**
* Disposes this ClientCursor's PlanExecutor. Must be called before deleting a ClientCursor to
diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp
index 1e57c922448..901eaea3306 100644
--- a/src/mongo/db/commands/getmore_cmd.cpp
+++ b/src/mongo/db/commands/getmore_cmd.cpp
@@ -67,6 +67,37 @@ namespace {
MONGO_FP_DECLARE(rsStopGetMoreCmd);
+// Helper function which sets the 'msg' field of the opCtx's CurOp to the specified string, and
+// returns the original value of the field.
+std::string updateCurOpMsg(OperationContext* opCtx, const std::string& newMsg) {
+ stdx::lock_guard<Client> lk(*opCtx->getClient());
+ auto oldMsg = CurOp::get(opCtx)->getMessage();
+ CurOp::get(opCtx)->setMessage_inlock(newMsg.c_str());
+ return oldMsg;
+}
+
+// This helper function works much like MONGO_FAIL_POINT_PAUSE_WHILE_SET, but it additionally
+// releases and re-acquires the collection readLock at regular intervals, in order to avoid
+// deadlocks caused by the pinned-cursor failpoints in this file (see SERVER-21997). Finally, it
+// also sets the 'msg' field of the opCtx's CurOp to the given string while the failpoint is active.
+void waitWhileFailPointEnabled(FailPoint* failPoint,
+ OperationContext* opCtx,
+ const NamespaceString& nss,
+ const std::string& curOpMsg,
+ boost::optional<AutoGetCollectionForRead>* readLock) {
+ invariant(failPoint);
+ auto origCurOpMsg = updateCurOpMsg(opCtx, curOpMsg);
+
+ while (MONGO_FAIL_POINT((*failPoint))) {
+ sleepFor(Milliseconds(10));
+ if (readLock && *readLock) {
+ readLock->reset();
+ readLock->emplace(opCtx, nss);
+ }
+ }
+ updateCurOpMsg(opCtx, origCurOpMsg);
+}
+
/**
* A command for running getMore() against an existing cursor registered with a CursorManager.
* Used to generate the next batch of results for a ClientCursor.
@@ -228,15 +259,15 @@ public:
ClientCursor* cursor = ccPin.getValue().getCursor();
- // If the fail point is enabled, busy wait until it is disabled.
- while (MONGO_FAIL_POINT(keepCursorPinnedDuringGetMore)) {
- if (readLock) {
- // We unlock and re-acquire the locks periodically in order to avoid deadlock (see
- // SERVER-21997 for details).
- sleepFor(Milliseconds(10));
- readLock.reset();
- readLock.emplace(opCtx, request.nss);
- }
+ // If the 'waitAfterPinningCursorBeforeGetMoreBatch' fail point is enabled, set the 'msg'
+ // field of this operation's CurOp to signal that we've hit this point and then spin until
+ // the failpoint is released.
+ if (MONGO_FAIL_POINT(waitAfterPinningCursorBeforeGetMoreBatch)) {
+ waitWhileFailPointEnabled(&waitAfterPinningCursorBeforeGetMoreBatch,
+ opCtx,
+ request.nss,
+ "waitAfterPinningCursorBeforeGetMoreBatch",
+ &readLock);
}
// A user can only call getMore on their own cursor. If there were multiple users
@@ -344,6 +375,18 @@ public:
awaitDataState(opCtx).shouldWaitForInserts = true;
}
+ // We're about to begin running the PlanExecutor in order to fill the getMore batch. If the
+ // 'waitWithPinnedCursorDuringGetMoreBatch' failpoint is active, set the 'msg' field of this
+ // operation's CurOp to signal that we've hit this point and then spin until the failpoint
+ // is released.
+ if (MONGO_FAIL_POINT(waitWithPinnedCursorDuringGetMoreBatch)) {
+ waitWhileFailPointEnabled(&waitWithPinnedCursorDuringGetMoreBatch,
+ opCtx,
+ request.nss,
+ "waitWithPinnedCursorDuringGetMoreBatch",
+ &readLock);
+ }
+
Status batchStatus = generateBatch(opCtx, cursor, request, &nextBatch, &state, &numResults);
if (!batchStatus.isOK()) {
return CommandHelpers::appendCommandStatus(result, batchStatus);
@@ -388,6 +431,18 @@ public:
cursorFreer.Dismiss();
}
+ // We're about to unpin the cursor as the ClientCursorPin goes out of scope (or delete it,
+ // if it has been exhausted). If the 'waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch'
+ // failpoint is active, set the 'msg' field of this operation's CurOp to signal that we've
+ // hit this point and then spin until the failpoint is released.
+ if (MONGO_FAIL_POINT(waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch)) {
+ waitWhileFailPointEnabled(&waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch,
+ opCtx,
+ request.nss,
+ "waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch",
+ &readLock);
+ }
+
return true;
}
@@ -451,28 +506,27 @@ public:
return Status::OK();
}
- if (PlanExecutor::FAILURE == *state) {
- nextBatch->abandon();
-
- error() << "GetMore command executor error: " << PlanExecutor::statestr(*state)
- << ", stats: " << redact(Explain::getWinningPlanStats(exec));
-
- return Status(ErrorCodes::OperationFailed,
- str::stream() << "GetMore command executor error: "
- << WorkingSetCommon::toStatusString(obj));
- } else if (PlanExecutor::DEAD == *state) {
- nextBatch->abandon();
-
- return Status(ErrorCodes::QueryPlanKilled,
- str::stream() << "PlanExecutor killed: "
- << WorkingSetCommon::toStatusString(obj));
- } else if (PlanExecutor::IS_EOF == *state) {
- // This causes the reported latest oplog timestamp to advance even when there are
- // no results for this particular query.
- nextBatch->setLatestOplogTimestamp(exec->getLatestOplogTimestamp());
+ switch (*state) {
+ case PlanExecutor::FAILURE:
+ // Log an error message and then perform the same cleanup as DEAD.
+ error() << "GetMore command executor error: " << PlanExecutor::statestr(*state)
+ << ", stats: " << redact(Explain::getWinningPlanStats(exec));
+ case PlanExecutor::DEAD: {
+ nextBatch->abandon();
+ // We should always have a valid status member object at this point.
+ auto status = WorkingSetCommon::getMemberObjectStatus(obj);
+ invariant(!status.isOK());
+ return status;
+ }
+ case PlanExecutor::IS_EOF:
+ // This causes the reported latest oplog timestamp to advance even when there are
+ // no results for this particular query.
+ nextBatch->setLatestOplogTimestamp(exec->getLatestOplogTimestamp());
+ default:
+ return Status::OK();
}
- return Status::OK();
+ MONGO_UNREACHABLE;
}
} getMoreCmd;
diff --git a/src/mongo/db/cursor_manager.cpp b/src/mongo/db/cursor_manager.cpp
index fac25953f3c..5168b638e0a 100644
--- a/src/mongo/db/cursor_manager.cpp
+++ b/src/mongo/db/cursor_manager.cpp
@@ -26,6 +26,8 @@
* it in the license file.
*/
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery
+
#include "mongo/platform/basic.h"
#include "mongo/db/cursor_manager.h"
@@ -51,6 +53,7 @@
#include "mongo/platform/random.h"
#include "mongo/stdx/memory.h"
#include "mongo/util/exit.h"
+#include "mongo/util/log.h"
#include "mongo/util/startup_test.h"
namespace mongo {
@@ -420,7 +423,7 @@ void CursorManager::invalidateAll(OperationContext* opCtx,
for (auto&& exec : partition) {
// The PlanExecutor is owned elsewhere, so we just mark it as killed and let it be
// cleaned up later.
- exec->markAsKilled(reason);
+ exec->markAsKilled({ErrorCodes::QueryPlanKilled, reason});
}
}
allExecPartitions.clear();
@@ -431,7 +434,7 @@ void CursorManager::invalidateAll(OperationContext* opCtx,
for (auto&& partition : allCurrentPartitions) {
for (auto it = partition.begin(); it != partition.end();) {
auto* cursor = it->second;
- cursor->markAsKilled(reason);
+ cursor->markAsKilled({ErrorCodes::QueryPlanKilled, reason});
// If there's an operation actively using the cursor, then that operation is now
// responsible for cleaning it up. Otherwise we can immediately dispose of it.
@@ -541,9 +544,7 @@ StatusWith<ClientCursorPin> CursorManager::pinCursor(OperationContext* opCtx,
!cursor->_operationUsingCursor);
if (cursor->getExecutor()->isMarkedAsKilled()) {
// This cursor was killed while it was idle.
- Status error{ErrorCodes::QueryPlanKilled,
- str::stream() << "cursor killed because: "
- << cursor->getExecutor()->getKillReason()};
+ Status error = cursor->getExecutor()->getKillStatus();
lockedPartition->erase(cursor->cursorid());
cursor->dispose(opCtx);
delete cursor;
@@ -572,10 +573,24 @@ void CursorManager::unpin(OperationContext* opCtx, ClientCursor* cursor) {
// Avoid computing the current time within the critical section.
auto now = opCtx->getServiceContext()->getPreciseClockSource()->now();
- auto partitionLock = _cursorMap->lockOnePartition(cursor->cursorid());
+ auto partition = _cursorMap->lockOnePartition(cursor->cursorid());
invariant(cursor->_operationUsingCursor);
+
+ // We must verify that no interrupts have occurred since we finished building the current
+ // batch. Otherwise, the cursor will be checked back in, the interrupted opCtx will be
+ // destroyed, and subsequent getMores with a fresh opCtx will succeed.
+ auto interruptStatus = cursor->_operationUsingCursor->checkForInterruptNoAssert();
cursor->_operationUsingCursor = nullptr;
cursor->_lastUseDate = now;
+ if (!interruptStatus.isOK()) {
+ // If an interrupt occurred after the batch was completed, we remove the now-unpinned cursor
+ // from the CursorManager, then dispose of and delete it.
+ LOG(0) << "removing cursor " << cursor->cursorid()
+ << " after completing batch: " << interruptStatus;
+ partition->erase(cursor->cursorid());
+ cursor->dispose(opCtx);
+ delete cursor;
+ }
}
void CursorManager::getCursorIds(std::set<CursorId>* openCursors) const {
diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp
index 6038193508e..75ba802b40f 100644
--- a/src/mongo/db/query/find.cpp
+++ b/src/mongo/db/query/find.cpp
@@ -202,14 +202,22 @@ void generateBatch(int ntoreturn,
}
// Propagate any errors to the caller.
- if (PlanExecutor::FAILURE == *state) {
- error() << "getMore executor error, stats: " << redact(Explain::getWinningPlanStats(exec));
- uasserted(17406, "getMore executor error: " + WorkingSetCommon::toStatusString(obj));
- } else if (PlanExecutor::DEAD == *state) {
- uasserted(ErrorCodes::QueryPlanKilled,
- str::stream() << "PlanExecutor killed: "
- << WorkingSetCommon::toStatusString(obj));
+ switch (*state) {
+ // Log an error message and then perform the same cleanup as DEAD.
+ case PlanExecutor::FAILURE:
+ error() << "getMore executor error, stats: "
+ << redact(Explain::getWinningPlanStats(exec));
+ case PlanExecutor::DEAD: {
+ // We should always have a valid status object by this point.
+ auto status = WorkingSetCommon::getMemberObjectStatus(obj);
+ invariant(!status.isOK());
+ uassertStatusOK(status);
+ }
+ default:
+ return;
}
+
+ MONGO_UNREACHABLE;
}
} // namespace
@@ -324,7 +332,6 @@ Message getMore(OperationContext* opCtx,
cursorid = 0;
resultFlags = ResultFlag_CursorNotFound;
} else {
- invariant(ccPin == ErrorCodes::QueryPlanKilled || ccPin == ErrorCodes::Unauthorized);
uassertStatusOK(ccPin.getStatus());
}
} else {
diff --git a/src/mongo/db/query/find_common.cpp b/src/mongo/db/query/find_common.cpp
index b59b6e2a699..affea8b8967 100644
--- a/src/mongo/db/query/find_common.cpp
+++ b/src/mongo/db/query/find_common.cpp
@@ -36,10 +36,14 @@
namespace mongo {
-MONGO_FP_DECLARE(keepCursorPinnedDuringGetMore);
-
MONGO_FP_DECLARE(disableAwaitDataForGetMoreCmd);
+MONGO_FP_DECLARE(waitAfterPinningCursorBeforeGetMoreBatch);
+
+MONGO_FP_DECLARE(waitWithPinnedCursorDuringGetMoreBatch);
+
+MONGO_FP_DECLARE(waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch);
+
const OperationContext::Decoration<AwaitDataState> awaitDataState =
OperationContext::declareDecoration<AwaitDataState>();
diff --git a/src/mongo/db/query/find_common.h b/src/mongo/db/query/find_common.h
index 0e83e3cb546..6aaa8e390ff 100644
--- a/src/mongo/db/query/find_common.h
+++ b/src/mongo/db/query/find_common.h
@@ -54,14 +54,22 @@ extern const OperationContext::Decoration<AwaitDataState> awaitDataState;
class BSONObj;
class QueryRequest;
-// Enabling this fail point will cause the getMore command to busy wait after pinning the cursor,
-// until the fail point is disabled.
-MONGO_FP_FORWARD_DECLARE(keepCursorPinnedDuringGetMore);
-
// Failpoint for making getMore not wait for an awaitdata cursor. Allows us to avoid waiting during
// tests.
MONGO_FP_FORWARD_DECLARE(disableAwaitDataForGetMoreCmd);
+// Enabling this fail point will cause the getMore command to busy wait after pinning the cursor but
+// before we have started building the batch, until the fail point is disabled.
+MONGO_FP_FORWARD_DECLARE(waitAfterPinningCursorBeforeGetMoreBatch);
+
+// Enabling this fail point will cause the getMore command to busy wait with its cursor pinned while
+// building the batch, until the fail point is disabled.
+MONGO_FP_FORWARD_DECLARE(waitWithPinnedCursorDuringGetMoreBatch);
+
+// Enabling this failpoint will cause the getMore to wait just before it unpins its cursor after it
+// has completed building the current batch.
+MONGO_FP_FORWARD_DECLARE(waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch);
+
/**
* Suite of find/getMore related functions used in both the mongod and mongos query paths.
*/
diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp
index b3ad1aa08d0..a85210d0751 100644
--- a/src/mongo/db/query/plan_executor.cpp
+++ b/src/mongo/db/query/plan_executor.cpp
@@ -370,9 +370,7 @@ Status PlanExecutor::restoreStateWithoutRetrying() {
}
_currentState = kUsable;
- return isMarkedAsKilled()
- ? Status{ErrorCodes::QueryPlanKilled, "query killed during yield: " + *_killReason}
- : Status::OK();
+ return _killStatus;
}
void PlanExecutor::detachFromOperationContext() {
@@ -501,10 +499,8 @@ PlanExecutor::ExecState PlanExecutor::getNextImpl(Snapshotted<BSONObj>* objOut,
invariant(_currentState == kUsable);
if (isMarkedAsKilled()) {
if (NULL != objOut) {
- Status status(ErrorCodes::OperationFailed,
- str::stream() << "Operation aborted because: " << *_killReason);
*objOut = Snapshotted<BSONObj>(SnapshotId(),
- WorkingSetCommon::buildMemberStatusObject(status));
+ WorkingSetCommon::buildMemberStatusObject(_killStatus));
}
return PlanExecutor::DEAD;
}
@@ -646,8 +642,12 @@ bool PlanExecutor::isEOF() {
return isMarkedAsKilled() || (_stash.empty() && _root->isEOF());
}
-void PlanExecutor::markAsKilled(string reason) {
- _killReason = std::move(reason);
+void PlanExecutor::markAsKilled(Status killStatus) {
+ invariant(!killStatus.isOK());
+ // If killed multiple times, only retain the first status.
+ if (_killStatus.isOK()) {
+ _killStatus = killStatus;
+ }
}
void PlanExecutor::dispose(OperationContext* opCtx, CursorManager* cursorManager) {
@@ -678,8 +678,7 @@ Status PlanExecutor::executePlan() {
if (PlanExecutor::DEAD == state || PlanExecutor::FAILURE == state) {
if (isMarkedAsKilled()) {
- return Status(ErrorCodes::QueryPlanKilled,
- str::stream() << "Operation aborted because: " << *_killReason);
+ return _killStatus;
}
auto errorStatus = WorkingSetCommon::getMemberObjectStatus(obj);
diff --git a/src/mongo/db/query/plan_executor.h b/src/mongo/db/query/plan_executor.h
index 9ce62619e2d..182dfa168c5 100644
--- a/src/mongo/db/query/plan_executor.h
+++ b/src/mongo/db/query/plan_executor.h
@@ -374,10 +374,12 @@ public:
/**
* If we're yielding locks, the database we're operating over or any collection we're relying on
* may be dropped. Plan executors are notified of such events by calling markAsKilled().
- * Callers must specify the 'reason' for why this executor is being killed. Subsequent calls to
- * getNext() will return DEAD, and fill 'objOut' with an error detail including 'reason'.
+ * Callers must specify the reason for why this executor is being killed. Subsequent calls to
+ * getNext() will return DEAD, and fill 'objOut' with an error reflecting 'killStatus'. If this
+ * method is called multiple times, only the first 'killStatus' will be retained. It is an error
+ * to call this method with Status::OK.
*/
- void markAsKilled(std::string reason);
+ void markAsKilled(Status killStatus);
/**
* Cleans up any state associated with this PlanExecutor. Must be called before deleting this
@@ -446,12 +448,12 @@ public:
}
bool isMarkedAsKilled() const {
- return static_cast<bool>(_killReason);
+ return !_killStatus.isOK();
}
- const std::string& getKillReason() {
+ Status getKillStatus() {
invariant(isMarkedAsKilled());
- return *_killReason;
+ return _killStatus;
}
bool isDisposed() const {
@@ -550,9 +552,9 @@ private:
std::unique_ptr<QuerySolution> _qs;
std::unique_ptr<PlanStage> _root;
- // If _killReason has a value, then we have been killed and the value represents the reason for
- // the kill.
- boost::optional<std::string> _killReason;
+ // If _killStatus has a non-OK value, then we have been killed and the value represents the
+ // reason for the kill.
+ Status _killStatus = Status::OK();
// What namespace are we operating over?
NamespaceString _nss;
diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp
index d8f27ea170b..1ce61880996 100644
--- a/src/mongo/s/query/cluster_find.cpp
+++ b/src/mongo/s/query/cluster_find.cpp
@@ -405,7 +405,7 @@ StatusWith<CursorResponse> ClusterFind::runGetMore(OperationContext* opCtx,
invariant(request.cursorid == pinnedCursor.getValue().getCursorId());
// If the fail point is enabled, busy wait until it is disabled.
- while (MONGO_FAIL_POINT(keepCursorPinnedDuringGetMore)) {
+ while (MONGO_FAIL_POINT(waitAfterPinningCursorBeforeGetMoreBatch)) {
}
if (auto readPref = pinnedCursor.getValue().getReadPreference()) {