SERVER-62710 Ensure that AsyncResultsMerger attempts to kill shard cursors when maxTimeMs is exceeded

author: Romans Kasperovics <romans.kasperovics@mongodb.com> 2022-03-15 08:55:37 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-03-15 09:33:32 +0000
commit: 80421c5b8e5ac71e16dc005fd961901884891c47 (patch)
tree: f10f3369dd60df12bea2c4e7060793061d4c7486
parent: abe5428751586d14241f94f06261c7037690557f (diff)
download: mongo-80421c5b8e5ac71e16dc005fd961901884891c47.tar.gz
5 files changed, 161 insertions, 53 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index bf463998461..ef036616336 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -152,6 +152,8 @@ last-continuous:
       test_file: jstests/aggregation/lookup_let_optimization.js
     - ticket: SERVER-63129
       test_file: jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js
+    - ticket: SERVER-62710
+      test_file: jstests/sharding/max_time_ms_does_not_leak_shard_cursor.js
     - ticket: SERVER-62759
       test_file: jstests/replsets/apply_ops_dropDatabase.js
     - ticket: SERVER-63732
@@ -447,6 +449,8 @@ last-lts:
       test_file: jstests/aggregation/lookup_let_optimization.js
     - ticket: SERVER-63129
       test_file: jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js
+    - ticket: SERVER-62710
+      test_file: jstests/sharding/max_time_ms_does_not_leak_shard_cursor.js
     - ticket: SERVER-62759
       test_file: jstests/replsets/apply_ops_dropDatabase.js
     - ticket: SERVER-63732
diff --git a/jstests/sharding/max_time_ms_does_not_leak_shard_cursor.js b/jstests/sharding/max_time_ms_does_not_leak_shard_cursor.js
new file mode 100644
index 00000000000..0c0d67b9554
--- /dev/null
+++ b/jstests/sharding/max_time_ms_does_not_leak_shard_cursor.js
@@ -0,0 +1,75 @@
+// Tests that if a mongoS cursor exceeds the maxTimeMs timeout, the cursors on the shards will be
+// cleaned up. Exercises the fix for the bug described in SERVER-62710.
+//
+// @tags: []
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");  // for 'configureFailPoint()'
+
+function getIdleCursors(conn, collName) {
+    return conn.getDB('admin')
+        .aggregate([
+            {$currentOp: {idleCursors: true}},
+            {$match: {$and: [{type: "idleCursor"}, {"cursor.originatingCommand.find": collName}]}}
+        ])
+        .toArray();
+}
+
+function assertNoIdleCursors(conn, collName) {
+    const sleepTimeMS = 10 * 1000;
+    const retries = 2;
+    assert.soon(() => {
+        return getIdleCursors(conn, collName).length === 0;
+    }, tojson(getIdleCursors(conn, collName)), retries * sleepTimeMS, sleepTimeMS, {
+        runHangAnalyzer: false
+    });
+}
+
+const st = new ShardingTest({shards: 1, mongos: 1, config: 1});
+
+const dbName = "test";
+const collName = jsTestName();
+
+const coll = st.s.getCollection(dbName + "." + collName);
+
+assert.commandWorked(coll.insert(Array.from({length: 1000}, _ => ({a: 1}))));
+
+// Perform a query that sleeps after retrieving each document.
+// This is guaranteed to exceed the specified maxTimeMS limit.
+// The timeout may happen either on mongoS or on shard.
+const curs = coll.find({
+                     $where: function() {
+                         sleep(1);
+                         return true;
+                     }
+                 })
+                 .batchSize(2)
+                 .maxTimeMS(100);
+assert.eq(getIdleCursors(st.shard0, collName).length, 0);
+assert.throwsWithCode(() => {
+    curs.itcount();
+}, ErrorCodes.MaxTimeMSExpired);
+assertNoIdleCursors(st.shard0, collName);
+
+// Ensure the timeout happens on mongoS.
+const cursTestMongoS = coll.find({}).batchSize(2).maxTimeMS(100);
+const fpTestMongoS = configureFailPoint(st.s, "maxTimeAlwaysTimeOut", {}, "alwaysOn");
+assert.throwsWithCode(() => {
+    cursTestMongoS.itcount();
+}, ErrorCodes.MaxTimeMSExpired);
+fpTestMongoS.off();
+assertNoIdleCursors(st.shard0, collName);
+
+// Ensure the timeout happens on the shard.
+const cursTestShard0 = coll.find({}).batchSize(2).maxTimeMS(100);
+const fpTestShard0 = configureFailPoint(st.shard0, "maxTimeAlwaysTimeOut", {}, "alwaysOn");
+assert.throwsWithCode(() => {
+    cursTestShard0.itcount();
+}, ErrorCodes.MaxTimeMSExpired);
+fpTestShard0.off();
+assertNoIdleCursors(st.shard0, collName);
+
+st.stop();
+})();
diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp
index d78bc4e2434..9fe8b68b918 100644
--- a/src/mongo/db/commands/getmore_cmd.cpp
+++ b/src/mongo/db/commands/getmore_cmd.cpp
@@ -144,6 +144,57 @@ void validateTxnNumber(OperationContext* opCtx, int64_t cursorId, const ClientCu
 }
 
 /**
+ * Validate that the client has necessary privileges to call getMore on the given cursor.
+ */
+void validateAuthorization(const OperationContext* opCtx, const ClientCursor& cursor) {
+
+    auto authzSession = AuthorizationSession::get(opCtx->getClient());
+    // A user can only call getMore on their own cursor. If there were multiple users
+    // authenticated when the cursor was created, then at least one of them must be
+    // authenticated in order to run getMore on the cursor.
+    if (!authzSession->isCoauthorizedWith(cursor.getAuthenticatedUsers())) {
+        uasserted(ErrorCodes::Unauthorized,
+                  str::stream() << "cursor id " << cursor.cursorid()
+                                << " was not created by the authenticated user");
+    }
+
+    // Ensure that the client still has the privileges to run the originating command.
+    if (!authzSession->isAuthorizedForPrivileges(cursor.getOriginatingPrivileges())) {
+        uasserted(ErrorCodes::Unauthorized,
+                  str::stream() << "not authorized for getMore with cursor id "
+                                << cursor.cursorid());
+    }
+}
+
+/**
+ * Validate that the command's and cursor's namespaces match.
+ */
+void validateNamespace(const NamespaceString& commandNss, const ClientCursor& cursor) {
+    uassert(ErrorCodes::Unauthorized,
+            str::stream() << "Requested getMore on namespace '" << commandNss.ns()
+                          << "', but cursor belongs to a different namespace " << cursor.nss().ns(),
+            commandNss == cursor.nss());
+
+    if (commandNss.isOplog() && MONGO_unlikely(rsStopGetMoreCmd.shouldFail())) {
+        uasserted(ErrorCodes::CommandFailed,
+                  str::stream() << "getMore on " << commandNss.ns()
+                                << " rejected due to active fail point rsStopGetMoreCmd");
+    }
+}
+
+/**
+ * Validate that the command's maxTimeMS is only set when the cursor is in awaitData mode.
+ */
+void validateMaxTimeMS(const boost::optional<std::int64_t>& commandMaxTimeMS,
+                       const ClientCursor& cursor) {
+    if (commandMaxTimeMS.has_value()) {
+        uassert(ErrorCodes::BadValue,
+                "cannot set maxTimeMS on getMore command for a non-awaitData cursor",
+                cursor.isAwaitData());
+    }
+}
+
+/**
  * Apply the read concern from the cursor to this operation.
  */
 void applyCursorReadConcern(OperationContext* opCtx, repl::ReadConcernArgs rcArgs) {
@@ -403,7 +454,6 @@ public:
 
         void acquireLocksAndIterateCursor(OperationContext* opCtx,
                                           rpc::ReplyBuilderInterface* reply,
-                                          CursorManager* cursorManager,
                                           ClientCursorPin& cursorPin,
                                           CurOp* curOp) {
             // Cursors come in one of two flavors:
@@ -427,7 +477,6 @@ public:
             boost::optional<AutoGetCollectionForReadMaybeLockFree> readLock;
             boost::optional<AutoStatsTracker> statsTracker;
             NamespaceString nss(_cmd.getDbName(), _cmd.getCollection());
-            int64_t cursorId = _cmd.getCommandParameter();
 
             const bool disableAwaitDataFailpointActive =
                 MONGO_unlikely(disableAwaitDataForGetMoreCmd.shouldFail());
@@ -436,6 +485,9 @@ public:
             setUpOperationContextStateForGetMore(
                 opCtx, *cursorPin.getCursor(), _cmd, disableAwaitDataFailpointActive);
 
+            // On early return, typically due to a failed assertion, delete the cursor.
+            ScopeGuard cursorDeleter([&] { cursorPin.deleteUnderlying(); });
+
             if (cursorPin->getExecutor()->lockPolicy() ==
                 PlanExecutor::LockPolicy::kLocksInternally) {
                 if (!nss.isCollectionlessCursorNamespace()) {
@@ -481,49 +533,6 @@ public:
                     opCtx, nss, true));
             }
 
-            // A user can only call getMore on their own cursor. If there were multiple users
-            // authenticated when the cursor was created, then at least one of them must be
-            // authenticated in order to run getMore on the cursor.
-            auto authzSession = AuthorizationSession::get(opCtx->getClient());
-            if (!authzSession->isCoauthorizedWith(cursorPin->getAuthenticatedUsers())) {
-                uasserted(ErrorCodes::Unauthorized,
-                          str::stream() << "cursor id " << cursorId
-                                        << " was not created by the authenticated user");
-            }
-
-            // Ensure that the client still has the privileges to run the originating command.
-            if (!authzSession->isAuthorizedForPrivileges(cursorPin->getOriginatingPrivileges())) {
-                uasserted(ErrorCodes::Unauthorized,
-                          str::stream()
-                              << "not authorized for getMore with cursor id " << cursorId);
-            }
-
-            if (nss != cursorPin->nss()) {
-                uasserted(ErrorCodes::Unauthorized,
-                          str::stream() << "Requested getMore on namespace '" << nss.ns()
-                                        << "', but cursor belongs to a different namespace "
-                                        << cursorPin->nss().ns());
-            }
-
-            if (nss.isOplog() && MONGO_unlikely(rsStopGetMoreCmd.shouldFail())) {
-                uasserted(ErrorCodes::CommandFailed,
-                          str::stream() << "getMore on " << nss.ns()
-                                        << " rejected due to active fail point rsStopGetMoreCmd");
-            }
-
-            // Validation related to awaitData.
-            if (cursorPin->isAwaitData()) {
-                invariant(cursorPin->isTailable());
-            }
-
-            if (_cmd.getMaxTimeMS() && !cursorPin->isAwaitData()) {
-                uasserted(ErrorCodes::BadValue,
-                          "cannot set maxTimeMS on getMore command for a non-awaitData cursor");
-            }
-
-            // On early return, get rid of the cursor.
-            ScopeGuard cursorFreer([&] { cursorPin.deleteUnderlying(); });
-
             // If the 'waitAfterPinningCursorBeforeGetMoreBatch' fail point is enabled, set the
             // 'msg' field of this operation's CurOp to signal that we've hit this point and then
             // repeatedly release and re-acquire the collection readLock at regular intervals until
@@ -683,7 +692,7 @@ public:
             }
 
             if (shouldSaveCursor) {
-                respondWithId = cursorId;
+                respondWithId = cursorPin->cursorid();
 
                 exec->saveState();
                 exec->detachFromOperationContext();
@@ -713,7 +722,7 @@ public:
             curOp->debug().nreturned = numResults;
 
             if (respondWithId) {
-                cursorFreer.dismiss();
+                cursorDeleter.dismiss();
 
                 if (opCtx->isExhaust()) {
                     // Indicate that an exhaust message should be generated and the previous BSONObj
@@ -751,21 +760,23 @@ public:
                 opCtx->lockState()->skipAcquireTicket();
             }
 
-            auto cursorManager = CursorManager::get(opCtx);
-            auto pinCheck = [opCtx, cursorId](const ClientCursor& cc) {
-                // Ensure the lsid and txnNumber of the getMore match that of the
-                // originating command.
+            // Perform validation checks which don't cause the cursor to be deleted on failure.
+            auto pinCheck = [&](const ClientCursor& cc) {
                 validateLSID(opCtx, cursorId, &cc);
                 validateTxnNumber(opCtx, cursorId, &cc);
+                validateAuthorization(opCtx, cc);
+                validateNamespace(nss, cc);
+                validateMaxTimeMS(_cmd.getMaxTimeMS(), cc);
             };
 
-            auto cursorPin = uassertStatusOK(cursorManager->pinCursor(opCtx, cursorId, pinCheck));
+            auto cursorPin =
+                uassertStatusOK(CursorManager::get(opCtx)->pinCursor(opCtx, cursorId, pinCheck));
 
             // Get the read concern level here in case the cursor is exhausted while iterating.
             const auto isLinearizableReadConcern = cursorPin->getReadConcernArgs().getLevel() ==
                 repl::ReadConcernLevel::kLinearizableReadConcern;
 
-            acquireLocksAndIterateCursor(opCtx, reply, cursorManager, cursorPin, curOp);
+            acquireLocksAndIterateCursor(opCtx, reply, cursorPin, curOp);
 
             if (MONGO_unlikely(getMoreHangAfterPinCursor.shouldFail())) {
                 LOGV2(20477,
diff --git a/src/mongo/s/query/async_results_merger.cpp b/src/mongo/s/query/async_results_merger.cpp
index 56f2deff96e..363e151fbfd 100644
--- a/src/mongo/s/query/async_results_merger.cpp
+++ b/src/mongo/s/query/async_results_merger.cpp
@@ -852,6 +852,11 @@ void AsyncResultsMerger::_scheduleKillCursors(WithLock, OperationContext* opCtx)
 
             executor::RemoteCommandRequest request(
                 remote.getTargetHost(), _params.getNss().db().toString(), cmdObj, opCtx);
+            // The 'RemoteCommandRequest' takes the remaining time from the 'opCtx' parameter. If
+            // the cursor was killed due to a maxTimeMs timeout, the remaining time will be 0, and
+            // the remote request will not be sent. To avoid this, we remove the timeout for the
+            // remote 'killCursor' command.
+            request.timeout = executor::RemoteCommandRequestBase::kNoTimeout;
 
             // Send kill request; discard callback handle, if any, or failure report, if not.
             _executor->scheduleRemoteCommand(request, [](auto const&) {}).getStatus().ignore();
diff --git a/src/mongo/s/query/async_results_merger_test.cpp b/src/mongo/s/query/async_results_merger_test.cpp
index 6caad31478e..d089ac691c9 100644
--- a/src/mongo/s/query/async_results_merger_test.cpp
+++ b/src/mongo/s/query/async_results_merger_test.cpp
@@ -961,6 +961,19 @@ TEST_F(AsyncResultsMergerTest, KillCalledTwice) {
     killFuture2.wait();
 }
 
+TEST_F(AsyncResultsMergerTest, KillCursorCmdHasNoTimeout) {
+    std::vector<RemoteCursor> cursors;
+    cursors.push_back(
+        makeRemoteCursor(kTestShardIds[0], kTestShardHosts[0], CursorResponse(kTestNss, 1, {})));
+    auto arm = makeARMFromExistingCursors(std::move(cursors));
+
+    auto* opCtx = operationContext();
+    opCtx->setDeadlineAfterNowBy(Microseconds::zero(), ErrorCodes::MaxTimeMSExpired);
+    auto killFuture = arm->kill(opCtx);
+    ASSERT_EQ(executor::RemoteCommandRequestBase::kNoTimeout, getNthPendingRequest(0u).timeout);
+    killFuture.wait();
+}
+
 TEST_F(AsyncResultsMergerTest, TailableBasic) {
     BSONObj findCmd = fromjson("{find: 'testcoll', tailable: true}");
     std::vector<RemoteCursor> cursors;
author	Romans Kasperovics <romans.kasperovics@mongodb.com>	2022-03-15 08:55:37 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-03-15 09:33:32 +0000
commit	80421c5b8e5ac71e16dc005fd961901884891c47 (patch)
tree	f10f3369dd60df12bea2c4e7060793061d4c7486
parent	abe5428751586d14241f94f06261c7037690557f (diff)
download	mongo-80421c5b8e5ac71e16dc005fd961901884891c47.tar.gz