SERVER-39149 Homogenize getMore behavior

Cursor's have subtly different semantics across mongos and mongod and between legacy getMore and command getMore. (as does the find command) Ensuring that all getMores can be paused by waitAfterPinningCursorBeforeGetMoreBatch, and that cursors are cleaned up if they are killed after verifying auth, makes testing a bit simpler and cursors less leaky And adding a check for waitInFindBeforeMakingBatch to runQuery similarly makes it easier to test
author: Jason Carey <jcarey@argv.me> 2019-01-23 15:52:32 -0500
committer: Jason Carey <jcarey@argv.me> 2019-02-07 18:01:42 -0500
commit: 4d703e26c2801971d538f948a4dc3191994f0074 (patch)
tree: b01a03e75298bb277e12febc35f067124a93b65a /src/mongo/db/query
parent: 1c61ba3c31850721b73df871feb0f38f217a61a1 (diff)
download: mongo-4d703e26c2801971d538f948a4dc3191994f0074.tar.gz
3 files changed, 54 insertions, 7 deletions
diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp
index da3ff98fb0c..97905e0cdda 100644
--- a/src/mongo/db/query/find.cpp
+++ b/src/mongo/db/query/find.cpp
@@ -353,6 +353,31 @@ Message getMore(OperationContext* opCtx,
 
     *isCursorAuthorized = true;
 
+    // Only used by the failpoints.
+    const auto dropAndReaquireReadLock = [&] {
+        // Make sure an interrupted operation does not prevent us from reacquiring the lock.
+        UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+
+        readLock.reset();
+        readLock.emplace(opCtx, NamespaceString(ns));
+    };
+
+    // On early return, get rid of the cursor.
+    auto cursorFreer = makeGuard([&] { cursorPin.deleteUnderlying(); });
+
+    // If the 'waitAfterPinningCursorBeforeGetMoreBatch' fail point is enabled, set the
+    // 'msg' field of this operation's CurOp to signal that we've hit this point and then
+    // repeatedly release and re-acquire the collection readLock at regular intervals until
+    // the failpoint is released. This is done in order to avoid deadlocks caused by the
+    // pinned-cursor failpoints in this file (see SERVER-21997).
+    if (MONGO_FAIL_POINT(waitAfterPinningCursorBeforeGetMoreBatch)) {
+        CurOpFailpointHelpers::waitWhileFailPointEnabled(&waitAfterPinningCursorBeforeGetMoreBatch,
+                                                         opCtx,
+                                                         "waitAfterPinningCursorBeforeGetMoreBatch",
+                                                         dropAndReaquireReadLock);
+    }
+
+
     const auto replicationMode = repl::ReplicationCoordinator::get(opCtx)->getReplicationMode();
 
     if (replicationMode == repl::ReplicationCoordinator::modeReplSet &&
@@ -421,9 +446,11 @@ Message getMore(OperationContext* opCtx,
     // accumulate over the course of a cursor's lifetime.
     PlanSummaryStats preExecutionStats;
     Explain::getSummaryStats(*exec, &preExecutionStats);
-    if (MONGO_FAIL_POINT(legacyGetMoreWaitWithCursor)) {
-        CurOpFailpointHelpers::waitWhileFailPointEnabled(
-            &legacyGetMoreWaitWithCursor, opCtx, "legacyGetMoreWaitWithCursor", nullptr);
+    if (MONGO_FAIL_POINT(waitWithPinnedCursorDuringGetMoreBatch)) {
+        CurOpFailpointHelpers::waitWhileFailPointEnabled(&waitWithPinnedCursorDuringGetMoreBatch,
+                                                         opCtx,
+                                                         "waitWithPinnedCursorDuringGetMoreBatch",
+                                                         nullptr);
     }
 
     generateBatch(ntoreturn, cursorPin.getCursor(), &bb, &numResults, &state);
@@ -478,8 +505,6 @@ Message getMore(OperationContext* opCtx,
     // ClientCursorPin is declared after our lock is declared, this will happen under the lock if
     // any locking was necessary.
     if (!shouldSaveCursorGetMore(state, exec, cursorPin->isTailable())) {
-        cursorPin.deleteUnderlying();
-
         // cc is now invalid, as is the executor
         cursorid = 0;
         curOp.debug().cursorExhausted = true;
@@ -487,6 +512,7 @@ Message getMore(OperationContext* opCtx,
         LOG(5) << "getMore NOT saving client cursor, ended with state "
                << PlanExecutor::statestr(state);
     } else {
+        cursorFreer.dismiss();
         // Continue caching the ClientCursor.
         cursorPin->incNReturnedSoFar(numResults);
         cursorPin->incNBatches();
@@ -505,6 +531,18 @@ Message getMore(OperationContext* opCtx,
         }
     }
 
+    // We're about to unpin or delete the cursor as the ClientCursorPin goes out of scope.
+    // If the 'waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch' failpoint is active, we
+    // set the 'msg' field of this operation's CurOp to signal that we've hit this point and
+    // then spin until the failpoint is released.
+    if (MONGO_FAIL_POINT(waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch)) {
+        CurOpFailpointHelpers::waitWhileFailPointEnabled(
+            &waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch,
+            opCtx,
+            "waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch",
+            dropAndReaquireReadLock);
+    }
+
     QueryResult::View qr = bb.buf();
     qr.msgdata().setLen(bb.len());
     qr.msgdata().setOperation(opReply);
@@ -604,6 +642,9 @@ std::string runQuery(OperationContext* opCtx,
     }
     opCtx->checkForInterrupt();  // May trigger maxTimeAlwaysTimeOut fail point.
 
+    CurOpFailpointHelpers::waitWhileFailPointEnabled(
+        &waitInFindBeforeMakingBatch, opCtx, "waitInFindBeforeMakingBatch");
+
     // Run the query.
     // bb is used to hold query results
     // this buffer should contain either requested documents per query or
diff --git a/src/mongo/db/query/find_common.cpp b/src/mongo/db/query/find_common.cpp
index e86695959ab..e74d9594a9c 100644
--- a/src/mongo/db/query/find_common.cpp
+++ b/src/mongo/db/query/find_common.cpp
@@ -45,6 +45,8 @@ MONGO_FAIL_POINT_DEFINE(disableAwaitDataForGetMoreCmd);
 
 MONGO_FAIL_POINT_DEFINE(waitAfterPinningCursorBeforeGetMoreBatch);
 
+MONGO_FAIL_POINT_DEFINE(waitWithPinnedCursorDuringGetMoreBatch);
+
 MONGO_FAIL_POINT_DEFINE(waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch);
 
 const OperationContext::Decoration<AwaitDataState> awaitDataState =
diff --git a/src/mongo/db/query/find_common.h b/src/mongo/db/query/find_common.h
index 4dfa751a110..ca4111dd754 100644
--- a/src/mongo/db/query/find_common.h
+++ b/src/mongo/db/query/find_common.h
@@ -63,11 +63,15 @@ MONGO_FAIL_POINT_DECLARE(waitInFindBeforeMakingBatch);
 // tests.
 MONGO_FAIL_POINT_DECLARE(disableAwaitDataForGetMoreCmd);
 
-// Enabling this fail point will cause the getMore command to busy wait after pinning the cursor
+// Enabling this fail point will cause getMores to busy wait after pinning the cursor
 // but before we have started building the batch, until the fail point is disabled.
 MONGO_FAIL_POINT_DECLARE(waitAfterPinningCursorBeforeGetMoreBatch);
 
-// Enabling this failpoint will cause the getMore to wait just before it unpins its cursor after it
+// Enabling this fail point will cause getMores to busy wait after setting up the plan executor and
+// before beginning the batch.
+MONGO_FAIL_POINT_DECLARE(waitWithPinnedCursorDuringGetMoreBatch);
+
+// Enabling this failpoint will cause getMores to wait just before it unpins its cursor after it
 // has completed building the current batch.
 MONGO_FAIL_POINT_DECLARE(waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch);
author	Jason Carey <jcarey@argv.me>	2019-01-23 15:52:32 -0500
committer	Jason Carey <jcarey@argv.me>	2019-02-07 18:01:42 -0500
commit	4d703e26c2801971d538f948a4dc3191994f0074 (patch)
tree	b01a03e75298bb277e12febc35f067124a93b65a /src/mongo/db/query
parent	1c61ba3c31850721b73df871feb0f38f217a61a1 (diff)
download	mongo-4d703e26c2801971d538f948a4dc3191994f0074.tar.gz