SERVER-75261 Added accounting for array element overhead for "listCollections", "listIndexes", "_shardsvrCheckMetadataConsistencyParticipant" commands

(cherry picked from commit 3cde7fd5d90f1e6bd16d38cc668963a14671d690) (cherry picked from commit 9cd5eeeba2184ce807985438ee984d7d8485b224) (cherry picked from commit ba334a1cb5f1a209e4f811a24ef1a6a65a1c9d0c)
author: Mindaugas Malinauskas <mindaugas.malinauskas@mongodb.com> 2023-03-29 16:51:28 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2023-04-12 14:22:25 +0000
commit: be81ddbba83a2a5220376a35aa7f2ad070b78300 (patch)
tree: 3f5ad8a43729e2fc5de7900010af116d1a86581b
parent: 4936d07dc233aff1529456527a1efc950cd85d61 (diff)
download: mongo-be81ddbba83a2a5220376a35aa7f2ad070b78300.tar.gz
8 files changed, 164 insertions, 24 deletions
diff --git a/jstests/noPassthrough/list_collections_large_number.js b/jstests/noPassthrough/list_collections_large_number.js
new file mode 100644
index 00000000000..379d4ea5dd7
--- /dev/null
+++ b/jstests/noPassthrough/list_collections_large_number.js
@@ -0,0 +1,34 @@
+/**
+ * Tests that "listCollections" command successfully returns results when the database has a very
+ * large number of collections.
+ * @tags: [
+ *  resource_intensive,
+ * ]
+ */
+(function() {
+"use strict";
+
+const conn = MongoRunner.runMongod({});
+assert.neq(null, conn, "mongod was unable to start up");
+const db = conn.getDB(jsTestName());
+
+const validatorObj = {
+    $jsonSchema: {
+        bsonType: "object",
+        properties: {
+            s: {bsonType: "string", description: "x".repeat(4801)},
+
+        }
+    }
+};
+const nCollections = 3300;
+jsTestLog(`Creating ${nCollections} collections....`);
+for (let i = 0; i < nCollections; i++) {
+    assert.commandWorked(db.createCollection("c_" + i.toPrecision(6), {validator: validatorObj}));
+}
+jsTestLog(`Done creating ${nCollections} collections`);
+assert.commandWorked(db.runCommand({"listCollections": 1}));
+
+// Do not validate collections since that is an expensive action.
+MongoRunner.stopMongod(conn, undefined, {skipValidation: true});
+})();
+\ No newline at end of file
diff --git a/src/mongo/db/commands/list_collections.cpp b/src/mongo/db/commands/list_collections.cpp
index adb891fce3d..75231504d09 100644
--- a/src/mongo/db/commands/list_collections.cpp
+++ b/src/mongo/db/commands/list_collections.cpp
@@ -498,7 +498,7 @@ public:
                     batchSize = *listCollRequest.getCursor()->getBatchSize();
                 }
 
-                int bytesBuffered = 0;
+                FindCommon::BSONArrayResponseSizeTracker responseSizeTracker;
                 for (long long objCount = 0; objCount < batchSize; objCount++) {
                     BSONObj nextDoc;
                     PlanExecutor::ExecState state = exec->getNext(&nextDoc, nullptr);
@@ -509,7 +509,7 @@ public:
 
                     // If we can't fit this result inside the current batch, then we stash it for
                     // later.
-                    if (!FindCommon::haveSpaceForNext(nextDoc, objCount, bytesBuffered)) {
+                    if (!responseSizeTracker.haveSpaceForNext(nextDoc)) {
                         exec->enqueue(nextDoc);
                         break;
                     }
@@ -525,7 +525,7 @@ public:
                             "error"_attr = exc);
                         fassertFailed(5254301);
                     }
-                    bytesBuffered += nextDoc.objsize();
+                    responseSizeTracker.add(nextDoc);
                 }
                 if (exec->isEOF()) {
                     return createListCollectionsCursorReply(
diff --git a/src/mongo/db/commands/list_indexes.cpp b/src/mongo/db/commands/list_indexes.cpp
index 1d89f789606..aec504dd96d 100644
--- a/src/mongo/db/commands/list_indexes.cpp
+++ b/src/mongo/db/commands/list_indexes.cpp
@@ -270,7 +270,7 @@ public:
                                             nss));
 
             std::vector<mongo::ListIndexesReplyItem> firstBatch;
-            int bytesBuffered = 0;
+            FindCommon::BSONArrayResponseSizeTracker responseSizeTracker;
             for (long long objCount = 0; objCount < batchSize; objCount++) {
                 BSONObj nextDoc;
                 PlanExecutor::ExecState state = exec->getNext(&nextDoc, nullptr);
@@ -282,7 +282,7 @@ public:
 
                 // If we can't fit this result inside the current batch, then we stash it for
                 // later.
-                if (!FindCommon::haveSpaceForNext(nextDoc, objCount, bytesBuffered)) {
+                if (!responseSizeTracker.haveSpaceForNext(nextDoc)) {
                     exec->enqueue(nextDoc);
                     break;
                 }
@@ -301,7 +301,7 @@ public:
                                           nextDoc.toString(),
                                           exc.toString()));
                 }
-                bytesBuffered += nextDoc.objsize();
+                responseSizeTracker.add(nextDoc);
             }
 
             if (exec->isEOF()) {
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript
index 5b66ba2bd67..d0b3b30524f 100644
--- a/src/mongo/db/query/SConscript
+++ b/src/mongo/db/query/SConscript
@@ -316,6 +316,7 @@ env.CppUnitTest(
         "classic_stage_builder_test.cpp",
         "count_command_test.cpp",
         "cursor_response_test.cpp",
+        "find_common_test.cpp",
         "get_executor_test.cpp",
         "getmore_request_test.cpp",
         "hint_parser_test.cpp",
diff --git a/src/mongo/db/query/find_common.cpp b/src/mongo/db/query/find_common.cpp
index 4f34e0b1cf4..d6084bb840f 100644
--- a/src/mongo/db/query/find_common.cpp
+++ b/src/mongo/db/query/find_common.cpp
@@ -95,4 +95,18 @@ void FindCommon::waitInFindBeforeMakingBatch(OperationContext* opCtx, const Cano
                                                      std::move(whileWaitingFunc),
                                                      cq.nss());
 }
+
+bool FindCommon::BSONArrayResponseSizeTracker::haveSpaceForNext(const BSONObj& document) {
+    return FindCommon::haveSpaceForNext(document, _numberOfDocuments, _bsonArraySizeInBytes);
+}
+void FindCommon::BSONArrayResponseSizeTracker::add(const BSONObj& document) {
+    dassert(haveSpaceForNext(document));
+    ++_numberOfDocuments;
+    _bsonArraySizeInBytes += (document.objsize() + kPerDocumentOverheadBytesUpperBound);
+}
+
+// Upper bound of BSON array element overhead. The overhead is 1 byte/doc for the type + 1 byte/doc
+// for the field name's null terminator + 1 byte per digit of the maximum array index value.
+const size_t FindCommon::BSONArrayResponseSizeTracker::kPerDocumentOverheadBytesUpperBound{
+    2 + std::to_string(BSONObjMaxUserSize / BSONObj::kMinBSONLength).length()};
 }  // namespace mongo
diff --git a/src/mongo/db/query/find_common.h b/src/mongo/db/query/find_common.h
index 14118cdea03..372bbc78347 100644
--- a/src/mongo/db/query/find_common.h
+++ b/src/mongo/db/query/find_common.h
@@ -89,7 +89,7 @@ public:
     // This max may be exceeded by epsilon for output documents that approach the maximum user
     // document size. That is, if we must return a BSONObjMaxUserSize document, then the total
     // response size will be BSONObjMaxUserSize plus the amount of size required for the message
-    // header and the cursor response "envelope". (The envolope contains namespace and cursor id
+    // header and the cursor response "envelope". (The envelope contains namespace and cursor id
     // info.)
     static const int kMaxBytesToReturnToClientAtOnce = BSONObjMaxUserSize;
 
@@ -128,6 +128,32 @@ public:
      * failpoint is active.
      */
     static void waitInFindBeforeMakingBatch(OperationContext* opCtx, const CanonicalQuery& cq);
+
+    /**
+     * Tracker of a size of a server response presented as a BSON array. Facilitates limiting the
+     * server response size to 16MB + certain epsilon. Accounts for array element and it's overhead
+     * size. Does not account for response "envelope" size.
+     */
+    class BSONArrayResponseSizeTracker {
+        // Upper bound of BSON array element overhead.
+        static const size_t kPerDocumentOverheadBytesUpperBound;
+
+    public:
+        /**
+         * Returns true only if 'document' can be added to the BSON array without violating the
+         * overall response size limit or if it is the first document.
+         */
+        bool haveSpaceForNext(const BSONObj& document);
+
+        /**
+         * Records that 'document' was added to the response.
+         */
+        void add(const BSONObj& document);
+
+    private:
+        std::size_t _numberOfDocuments{0};
+        std::size_t _bsonArraySizeInBytes{0};
+    };
 };
 
 }  // namespace mongo
diff --git a/src/mongo/db/query/find_common_test.cpp b/src/mongo/db/query/find_common_test.cpp
new file mode 100644
index 00000000000..d7dfc10d950
--- /dev/null
+++ b/src/mongo/db/query/find_common_test.cpp
@@ -0,0 +1,74 @@
+/**
+ *    Copyright (C) 2023-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include <string>
+
+#include "mongo/bson/bsonobj.h"
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/query/find_common.h"
+
+#include "mongo/unittest/unittest.h"
+
+namespace {
+
+using namespace mongo;
+
+TEST(BSONArrayResponseSizeTrackerTest, AddLargeNumberOfElements) {
+    BSONObjBuilder bsonObjBuilder;
+    {
+        FindCommon::BSONArrayResponseSizeTracker sizeTracker;
+        BSONArrayBuilder arrayBuilder{bsonObjBuilder.subarrayStart("a")};
+        BSONObj emptyObject;
+        while (sizeTracker.haveSpaceForNext(emptyObject)) {
+            sizeTracker.add(emptyObject);
+            arrayBuilder.append(emptyObject);
+        }
+    }
+    // If the BSON object is successfully constructed, then space accounting was correct.
+    bsonObjBuilder.obj();
+}
+TEST(BSONArrayResponseSizeTrackerTest, CanAddAtLeastOneDocument) {
+    auto largeObject = BSON("a" << std::string(16 * 1024 * 1024, 'A'));
+    BSONObj emptyObject;
+    BSONObjBuilder bsonObjBuilder;
+    {
+        FindCommon::BSONArrayResponseSizeTracker sizeTracker;
+        BSONArrayBuilder arrayBuilder{bsonObjBuilder.subarrayStart("a")};
+        // Add an object that is larger than 16MB.
+        ASSERT(sizeTracker.haveSpaceForNext(largeObject));
+        sizeTracker.add(largeObject);
+        arrayBuilder.append(largeObject);
+        ASSERT(!sizeTracker.haveSpaceForNext(emptyObject));
+    }
+    // If the BSON object is successfully constructed, then space accounting was correct.
+    bsonObjBuilder.obj();
+}
+}  // namespace
diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp
index b0b4b69ad3a..f3de160b959 100644
--- a/src/mongo/s/query/cluster_find.cpp
+++ b/src/mongo/s/query/cluster_find.cpp
@@ -80,11 +80,6 @@ static const BSONObj kSortKeyMetaProjection = BSON("$meta"
                                                    << "sortKey");
 static const BSONObj kGeoNearDistanceMetaProjection = BSON("$meta"
                                                            << "geoNearDistance");
-// We must allow some amount of overhead per result document, since when we make a cursor response
-// the documents are elements of a BSONArray. The overhead is 1 byte/doc for the type + 1 byte/doc
-// for the field name's null terminator + 1 byte per digit in the array index. The index can be no
-// more than 8 decimal digits since the response is at most 16MB, and 16 * 1024 * 1024 < 1 * 10^8.
-static const int kPerDocumentOverheadBytesUpperBound = 10;
 
 const char kFindCmdName[] = "find";
 
@@ -356,7 +351,7 @@ CursorId runQueryWithoutRetrying(OperationContext* opCtx,
     FindCommon::waitInFindBeforeMakingBatch(opCtx, query);
 
     auto cursorState = ClusterCursorManager::CursorState::NotExhausted;
-    int bytesBuffered = 0;
+    FindCommon::BSONArrayResponseSizeTracker responseSizeTracker;
 
     // This loop will not result in actually calling getMore against shards, but just loading
     // results from the initial batches (that were obtained while establishing cursors) into
@@ -379,14 +374,13 @@ CursorId runQueryWithoutRetrying(OperationContext* opCtx,
 
         // If adding this object will cause us to exceed the message size limit, then we stash it
         // for later.
-        if (!FindCommon::haveSpaceForNext(nextObj, results->size(), bytesBuffered)) {
+        if (!responseSizeTracker.haveSpaceForNext(nextObj)) {
             ccc->queueResult(nextObj);
             break;
         }
 
-        // Add doc to the batch. Account for the space overhead associated with returning this doc
-        // inside a BSON array.
-        bytesBuffered += (nextObj.objsize() + kPerDocumentOverheadBytesUpperBound);
+        // Add doc to the batch.
+        responseSizeTracker.add(nextObj);
         results->push_back(std::move(nextObj));
     }
 
@@ -774,7 +768,7 @@ StatusWith<CursorResponse> ClusterFind::runGetMore(OperationContext* opCtx,
     }
 
     std::vector<BSONObj> batch;
-    int bytesBuffered = 0;
+    FindCommon::BSONArrayResponseSizeTracker responseSizeTracker;
     long long batchSize = cmd.getBatchSize().value_or(0);
     long long startingFrom = pinnedCursor.getValue()->getNumReturnedSoFar();
     auto cursorState = ClusterCursorManager::CursorState::NotExhausted;
@@ -833,17 +827,14 @@ StatusWith<CursorResponse> ClusterFind::runGetMore(OperationContext* opCtx,
             break;
         }
 
-        if (!FindCommon::haveSpaceForNext(
-                *next.getValue().getResult(), batch.size(), bytesBuffered)) {
+        if (!responseSizeTracker.haveSpaceForNext(*next.getValue().getResult())) {
             pinnedCursor.getValue()->queueResult(*next.getValue().getResult());
             stashedResult = true;
             break;
         }
 
-        // Add doc to the batch. Account for the space overhead associated with returning this doc
-        // inside a BSON array.
-        bytesBuffered +=
-            (next.getValue().getResult()->objsize() + kPerDocumentOverheadBytesUpperBound);
+        // Add doc to the batch.
+        responseSizeTracker.add(*next.getValue().getResult());
         batch.push_back(std::move(*next.getValue().getResult()));
 
         // Update the postBatchResumeToken. For non-$changeStream aggregations, this will be empty.
author	Mindaugas Malinauskas <mindaugas.malinauskas@mongodb.com>	2023-03-29 16:51:28 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2023-04-12 14:22:25 +0000
commit	be81ddbba83a2a5220376a35aa7f2ad070b78300 (patch)
tree	3f5ad8a43729e2fc5de7900010af116d1a86581b
parent	4936d07dc233aff1529456527a1efc950cd85d61 (diff)
download	mongo-be81ddbba83a2a5220376a35aa7f2ad070b78300.tar.gz