summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Storch <david.storch@mongodb.com>2020-07-30 14:24:45 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-08-25 16:27:30 +0000
commitbfecd6ba7c6bd490c19a7bc6bd0e97b577711a6b (patch)
tree3fae38476c2677600641a7edf5bb44b6e8dc1150
parentd8c7c2c514fb8a44b7b2a731105582b66ed7b253 (diff)
downloadmongo-bfecd6ba7c6bd490c19a7bc6bd0e97b577711a6b.tar.gz
SERVER-40317 Fail query when $facet intermediate output exceeds 100MB
Co-authored-by: Justin Seyster <justin.seyster@mongodb.com> Co-authored-by: Jacob Evans <jacob.evans@10gen.com> (cherry picked from commit 1e62dde76309c37f8aae937f8782431177b90477)
-rw-r--r--buildscripts/resmokeconfig/suites/aggregation_facet_unwind_passthrough.yml4
-rw-r--r--jstests/aggregation/bugs/server5932.js6
-rw-r--r--jstests/aggregation/sources/facet/facet_memory_consumption.js83
-rw-r--r--src/mongo/db/pipeline/document_source_facet.cpp31
-rw-r--r--src/mongo/db/pipeline/document_source_facet.h10
-rw-r--r--src/mongo/db/query/query_knobs.cpp9
-rw-r--r--src/mongo/db/query/query_knobs.h3
7 files changed, 138 insertions, 8 deletions
diff --git a/buildscripts/resmokeconfig/suites/aggregation_facet_unwind_passthrough.yml b/buildscripts/resmokeconfig/suites/aggregation_facet_unwind_passthrough.yml
index 15792088726..917b43cf980 100644
--- a/buildscripts/resmokeconfig/suites/aggregation_facet_unwind_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/aggregation_facet_unwind_passthrough.yml
@@ -26,3 +26,7 @@ executor:
mongod_options:
set_parameters:
enableTestCommands: 1
+ # This passthrough wraps entire query result sets in $facet. Allow a larger-than-normal
+ # intermediate document size of 500MB in order to accommodate tests that have a large result
+ # set.
+ internalQueryFacetMaxOutputDocSizeBytes: 500000000
diff --git a/jstests/aggregation/bugs/server5932.js b/jstests/aggregation/bugs/server5932.js
index 1c2752297c5..970892e71b2 100644
--- a/jstests/aggregation/bugs/server5932.js
+++ b/jstests/aggregation/bugs/server5932.js
@@ -1,4 +1,10 @@
// server-5932 Cursor-based aggregation
+//
+// @tags: [
+// # The result set produced by this test is large, so when wrapped in a $facet, the maximum
+// # intermediate document size would be exceeded.
+// do_not_wrap_aggregations_in_facets,
+// ]
var t = db.server5932;
t.drop();
diff --git a/jstests/aggregation/sources/facet/facet_memory_consumption.js b/jstests/aggregation/sources/facet/facet_memory_consumption.js
new file mode 100644
index 00000000000..9407c5abac2
--- /dev/null
+++ b/jstests/aggregation/sources/facet/facet_memory_consumption.js
@@ -0,0 +1,83 @@
+/**
+ * Test that the $facet stage fails cleanly without consuming too much memory if the size of the
+ * facet's output document is large.
+ *
+ * This test was designed to reproduce SERVER-40317.
+ *
+ * Collections must be unsharded, since this test uses $lookup and sharded $lookup is not yet
+ * supported.
+ * @tags: [assumes_unsharded_collection]
+ */
+(function() {
+ "use strict";
+
+ const collName = "facet_memory_consumption";
+ const coll = db[collName];
+ const kFacetOutputTooLargeCode = 4031700;
+ const kBsonObjTooLargeCode = 10334;
+ coll.drop();
+
+ // A document that is slightly less than 1MB.
+ const doc = {str: "x".repeat(1024 * 1024 - 100)};
+
+ // Insert it into the collection twice.
+ assert.writeOK(coll.insert(doc));
+ assert.writeOK(coll.insert(doc));
+
+ // Creates a pipeline that chains Cartesian product pipelines to create a pipeline returning
+ // 2^exponent documents (assuming that there 2 documents in the 'collName' collection).
+ function cartesianProductPipeline(exponent) {
+ let productPipeline = [];
+ for (let i = 0; i < exponent - 1; ++i) {
+ productPipeline = productPipeline.concat([
+ {$lookup: {from: collName, pipeline: [{$match: {}}], as: "join"}},
+ {$unwind: "$join"},
+ {$project: {str: 1}},
+ ]);
+ }
+ return productPipeline;
+ }
+
+ (function succeedsWhenWithinMemoryLimit() {
+ // This pipeline uses $facet to return one document that is just slightly less than the
+ // 16MB, which is within the document size limit.
+ const result = coll.aggregate([{$facet: {product: cartesianProductPipeline(4)}}]).toArray();
+ const resultSize = Object.bsonsize(result);
+
+ // As a sanity check, make sure that the resulting document is somewhere around 16MB in
+ // size.
+ assert.gt(resultSize, 15 * 1024 * 1024, result);
+ assert.lt(resultSize, 16 * 1024 * 1024, result);
+ }());
+
+ (function failsWhenResultDocumentExeedsMaxBSONSize() {
+ // This pipeline uses $facet to create a document that is larger than the 16MB max document
+ // size.
+ const result = assert.throws(
+ () => coll.aggregate([{$facet: {product: cartesianProductPipeline(6)}}]).toArray());
+ assert.eq(result.code, kBsonObjTooLargeCode);
+ }());
+
+ (function succeedsWhenIntermediateDocumentExceedsMaxBSONSizeWithUnwind() {
+ // This pipeline uses $facet to create an intermediate document that is larger than the 16MB
+ // max document size but smaller than the 100MB allowed for an intermediate document. The
+ // $unwind stage breaks the large document into a bunch of small documents, which is legal.
+ const result =
+ coll.aggregate(
+ [{$facet: {product: cartesianProductPipeline(6)}}, {$unwind: "$product"}])
+ .toArray();
+ assert.eq(64, result.length, result);
+ }());
+
+ (function failsWhenFacetOutputDocumentTooLarge() {
+ // This pipeline uses $facet to create a document that is larger than the 100MB maximum size
+ // for an intermediate document. Even with the $unwind stage, the pipeline should fail, this
+ // time with error code 31034.
+ const result = assert.throws(
+ () =>
+ coll.aggregate(
+ [{$facet: {product: cartesianProductPipeline(10)}}, {$unwind: "$product"}])
+ .toArray());
+ assert.eq(result.code, kFacetOutputTooLargeCode);
+ }());
+}());
diff --git a/src/mongo/db/pipeline/document_source_facet.cpp b/src/mongo/db/pipeline/document_source_facet.cpp
index 1f89b1f276d..11265d30038 100644
--- a/src/mongo/db/pipeline/document_source_facet.cpp
+++ b/src/mongo/db/pipeline/document_source_facet.cpp
@@ -58,10 +58,13 @@ using std::string;
using std::vector;
DocumentSourceFacet::DocumentSourceFacet(std::vector<FacetPipeline> facetPipelines,
- const intrusive_ptr<ExpressionContext>& expCtx)
+ const intrusive_ptr<ExpressionContext>& expCtx,
+ size_t bufferSizeBytes,
+ size_t maxOutputDocBytes)
: DocumentSourceNeedsMongoProcessInterface(expCtx),
- _teeBuffer(TeeBuffer::create(facetPipelines.size())),
- _facets(std::move(facetPipelines)) {
+ _teeBuffer(TeeBuffer::create(facetPipelines.size(), bufferSizeBytes)),
+ _facets(std::move(facetPipelines)),
+ _maxOutputDocSizeBytes(maxOutputDocBytes) {
for (size_t facetId = 0; facetId < _facets.size(); ++facetId) {
auto& facet = _facets[facetId];
facet.pipeline->addInitialSource(
@@ -148,8 +151,12 @@ REGISTER_DOCUMENT_SOURCE(facet,
DocumentSourceFacet::createFromBson);
intrusive_ptr<DocumentSourceFacet> DocumentSourceFacet::create(
- std::vector<FacetPipeline> facetPipelines, const intrusive_ptr<ExpressionContext>& expCtx) {
- return new DocumentSourceFacet(std::move(facetPipelines), expCtx);
+ std::vector<FacetPipeline> facetPipelines,
+ const intrusive_ptr<ExpressionContext>& expCtx,
+ size_t bufferSizeBytes,
+ size_t maxOutputDocBytes) {
+ return new DocumentSourceFacet(
+ std::move(facetPipelines), expCtx, bufferSizeBytes, maxOutputDocBytes);
}
void DocumentSourceFacet::setSource(DocumentSource* source) {
@@ -170,6 +177,17 @@ DocumentSource::GetNextResult DocumentSourceFacet::getNext() {
return GetNextResult::makeEOF();
}
+ const size_t maxBytes = _maxOutputDocSizeBytes;
+ auto ensureUnderMemoryLimit = [ usedBytes = 0ul, &maxBytes ](long long additional) mutable {
+ usedBytes += additional;
+ uassert(4031700,
+ str::stream() << "document constructed by $facet is " << usedBytes
+ << " bytes, which exceeds the limit of "
+ << maxBytes
+ << " bytes",
+ usedBytes <= maxBytes);
+ };
+
vector<vector<Value>> results(_facets.size());
bool allPipelinesEOF = false;
while (!allPipelinesEOF) {
@@ -178,6 +196,7 @@ DocumentSource::GetNextResult DocumentSourceFacet::getNext() {
const auto& pipeline = _facets[facetId].pipeline;
auto next = pipeline->getSources().back()->getNext();
for (; next.isAdvanced(); next = pipeline->getSources().back()->getNext()) {
+ ensureUnderMemoryLimit(next.getDocument().getApproximateSize());
results[facetId].emplace_back(next.releaseDocument());
}
allPipelinesEOF = allPipelinesEOF && next.isEOF();
@@ -303,6 +322,6 @@ intrusive_ptr<DocumentSource> DocumentSourceFacet::createFromBson(
facetPipelines.emplace_back(facetName, std::move(pipeline));
}
- return new DocumentSourceFacet(std::move(facetPipelines), expCtx);
+ return DocumentSourceFacet::create(std::move(facetPipelines), expCtx);
}
} // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_facet.h b/src/mongo/db/pipeline/document_source_facet.h
index c534db24108..b196a7265fc 100644
--- a/src/mongo/db/pipeline/document_source_facet.h
+++ b/src/mongo/db/pipeline/document_source_facet.h
@@ -93,7 +93,9 @@ public:
static boost::intrusive_ptr<DocumentSourceFacet> create(
std::vector<FacetPipeline> facetPipelines,
- const boost::intrusive_ptr<ExpressionContext>& expCtx);
+ const boost::intrusive_ptr<ExpressionContext>& expCtx,
+ size_t bufferSizeBytes = internalQueryFacetBufferSizeBytes.load(),
+ size_t maxOutputDocBytes = internalQueryFacetMaxOutputDocSizeBytes.load());
/**
* Blocking call. Will consume all input and produces one output document.
@@ -148,13 +150,17 @@ protected:
private:
DocumentSourceFacet(std::vector<FacetPipeline> facetPipelines,
- const boost::intrusive_ptr<ExpressionContext>& expCtx);
+ const boost::intrusive_ptr<ExpressionContext>& expCtx,
+ size_t bufferSizeBytes,
+ size_t maxOutputDocBytes);
Value serialize(boost::optional<ExplainOptions::Verbosity> explain = boost::none) const final;
boost::intrusive_ptr<TeeBuffer> _teeBuffer;
std::vector<FacetPipeline> _facets;
+ const size_t _maxOutputDocSizeBytes;
+
bool _done = false;
};
} // namespace mongo
diff --git a/src/mongo/db/query/query_knobs.cpp b/src/mongo/db/query/query_knobs.cpp
index bdde620c87f..9e4a9aa3931 100644
--- a/src/mongo/db/query/query_knobs.cpp
+++ b/src/mongo/db/query/query_knobs.cpp
@@ -72,6 +72,15 @@ MONGO_EXPORT_SERVER_PARAMETER(internalQueryExecYieldPeriodMS, int, 10);
MONGO_EXPORT_SERVER_PARAMETER(internalQueryFacetBufferSizeBytes, int, 100 * 1024 * 1024);
+MONGO_EXPORT_SERVER_PARAMETER(internalQueryFacetMaxOutputDocSizeBytes, long long, 100 * 1024 * 1024)
+ ->withValidator([](const long long& newVal) {
+ if (newVal <= 0) {
+ return Status(ErrorCodes::BadValue,
+ "internalQueryFacetMaxOutputDocSizeBytes must be positive");
+ }
+ return Status::OK();
+ });
+
MONGO_EXPORT_SERVER_PARAMETER(internalLookupStageIntermediateDocumentMaxSizeBytes,
long long,
100 * 1024 * 1024)
diff --git a/src/mongo/db/query/query_knobs.h b/src/mongo/db/query/query_knobs.h
index 035915267e8..ed4304de4ae 100644
--- a/src/mongo/db/query/query_knobs.h
+++ b/src/mongo/db/query/query_knobs.h
@@ -118,6 +118,9 @@ const int64_t insertVectorMaxBytes = 256 * 1024;
// The number of bytes to buffer at once during a $facet stage.
extern AtomicInt32 internalQueryFacetBufferSizeBytes;
+// The maximum size in bytes of the $facet stage's output document.
+extern AtomicInt64 internalQueryFacetMaxOutputDocSizeBytes;
+
extern AtomicInt64 internalLookupStageIntermediateDocumentMaxSizeBytes;
extern AtomicInt32 internalInsertMaxBatchSize;