diff options
author | David Storch <david.storch@mongodb.com> | 2020-07-30 14:24:45 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-08-25 16:27:30 +0000 |
commit | bfecd6ba7c6bd490c19a7bc6bd0e97b577711a6b (patch) | |
tree | 3fae38476c2677600641a7edf5bb44b6e8dc1150 | |
parent | d8c7c2c514fb8a44b7b2a731105582b66ed7b253 (diff) | |
download | mongo-bfecd6ba7c6bd490c19a7bc6bd0e97b577711a6b.tar.gz |
SERVER-40317 Fail query when $facet intermediate output exceeds 100MB
Co-authored-by: Justin Seyster <justin.seyster@mongodb.com>
Co-authored-by: Jacob Evans <jacob.evans@10gen.com>
(cherry picked from commit 1e62dde76309c37f8aae937f8782431177b90477)
-rw-r--r-- | buildscripts/resmokeconfig/suites/aggregation_facet_unwind_passthrough.yml | 4 | ||||
-rw-r--r-- | jstests/aggregation/bugs/server5932.js | 6 | ||||
-rw-r--r-- | jstests/aggregation/sources/facet/facet_memory_consumption.js | 83 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_facet.cpp | 31 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_facet.h | 10 | ||||
-rw-r--r-- | src/mongo/db/query/query_knobs.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/query/query_knobs.h | 3 |
7 files changed, 138 insertions, 8 deletions
diff --git a/buildscripts/resmokeconfig/suites/aggregation_facet_unwind_passthrough.yml b/buildscripts/resmokeconfig/suites/aggregation_facet_unwind_passthrough.yml index 15792088726..917b43cf980 100644 --- a/buildscripts/resmokeconfig/suites/aggregation_facet_unwind_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/aggregation_facet_unwind_passthrough.yml @@ -26,3 +26,7 @@ executor: mongod_options: set_parameters: enableTestCommands: 1 + # This passthrough wraps entire query result sets in $facet. Allow a larger-than-normal + # intermediate document size of 500MB in order to accommodate tests that have a large result + # set. + internalQueryFacetMaxOutputDocSizeBytes: 500000000 diff --git a/jstests/aggregation/bugs/server5932.js b/jstests/aggregation/bugs/server5932.js index 1c2752297c5..970892e71b2 100644 --- a/jstests/aggregation/bugs/server5932.js +++ b/jstests/aggregation/bugs/server5932.js @@ -1,4 +1,10 @@ // server-5932 Cursor-based aggregation +// +// @tags: [ +// # The result set produced by this test is large, so when wrapped in a $facet, the maximum +// # intermediate document size would be exceeded. +// do_not_wrap_aggregations_in_facets, +// ] var t = db.server5932; t.drop(); diff --git a/jstests/aggregation/sources/facet/facet_memory_consumption.js b/jstests/aggregation/sources/facet/facet_memory_consumption.js new file mode 100644 index 00000000000..9407c5abac2 --- /dev/null +++ b/jstests/aggregation/sources/facet/facet_memory_consumption.js @@ -0,0 +1,83 @@ +/** + * Test that the $facet stage fails cleanly without consuming too much memory if the size of the + * facet's output document is large. + * + * This test was designed to reproduce SERVER-40317. + * + * Collections must be unsharded, since this test uses $lookup and sharded $lookup is not yet + * supported. + * @tags: [assumes_unsharded_collection] + */ +(function() { + "use strict"; + + const collName = "facet_memory_consumption"; + const coll = db[collName]; + const kFacetOutputTooLargeCode = 4031700; + const kBsonObjTooLargeCode = 10334; + coll.drop(); + + // A document that is slightly less than 1MB. + const doc = {str: "x".repeat(1024 * 1024 - 100)}; + + // Insert it into the collection twice. + assert.writeOK(coll.insert(doc)); + assert.writeOK(coll.insert(doc)); + + // Creates a pipeline that chains Cartesian product pipelines to create a pipeline returning + // 2^exponent documents (assuming that there 2 documents in the 'collName' collection). + function cartesianProductPipeline(exponent) { + let productPipeline = []; + for (let i = 0; i < exponent - 1; ++i) { + productPipeline = productPipeline.concat([ + {$lookup: {from: collName, pipeline: [{$match: {}}], as: "join"}}, + {$unwind: "$join"}, + {$project: {str: 1}}, + ]); + } + return productPipeline; + } + + (function succeedsWhenWithinMemoryLimit() { + // This pipeline uses $facet to return one document that is just slightly less than the + // 16MB, which is within the document size limit. + const result = coll.aggregate([{$facet: {product: cartesianProductPipeline(4)}}]).toArray(); + const resultSize = Object.bsonsize(result); + + // As a sanity check, make sure that the resulting document is somewhere around 16MB in + // size. + assert.gt(resultSize, 15 * 1024 * 1024, result); + assert.lt(resultSize, 16 * 1024 * 1024, result); + }()); + + (function failsWhenResultDocumentExeedsMaxBSONSize() { + // This pipeline uses $facet to create a document that is larger than the 16MB max document + // size. + const result = assert.throws( + () => coll.aggregate([{$facet: {product: cartesianProductPipeline(6)}}]).toArray()); + assert.eq(result.code, kBsonObjTooLargeCode); + }()); + + (function succeedsWhenIntermediateDocumentExceedsMaxBSONSizeWithUnwind() { + // This pipeline uses $facet to create an intermediate document that is larger than the 16MB + // max document size but smaller than the 100MB allowed for an intermediate document. The + // $unwind stage breaks the large document into a bunch of small documents, which is legal. + const result = + coll.aggregate( + [{$facet: {product: cartesianProductPipeline(6)}}, {$unwind: "$product"}]) + .toArray(); + assert.eq(64, result.length, result); + }()); + + (function failsWhenFacetOutputDocumentTooLarge() { + // This pipeline uses $facet to create a document that is larger than the 100MB maximum size + // for an intermediate document. Even with the $unwind stage, the pipeline should fail, this + // time with error code 31034. + const result = assert.throws( + () => + coll.aggregate( + [{$facet: {product: cartesianProductPipeline(10)}}, {$unwind: "$product"}]) + .toArray()); + assert.eq(result.code, kFacetOutputTooLargeCode); + }()); +}()); diff --git a/src/mongo/db/pipeline/document_source_facet.cpp b/src/mongo/db/pipeline/document_source_facet.cpp index 1f89b1f276d..11265d30038 100644 --- a/src/mongo/db/pipeline/document_source_facet.cpp +++ b/src/mongo/db/pipeline/document_source_facet.cpp @@ -58,10 +58,13 @@ using std::string; using std::vector; DocumentSourceFacet::DocumentSourceFacet(std::vector<FacetPipeline> facetPipelines, - const intrusive_ptr<ExpressionContext>& expCtx) + const intrusive_ptr<ExpressionContext>& expCtx, + size_t bufferSizeBytes, + size_t maxOutputDocBytes) : DocumentSourceNeedsMongoProcessInterface(expCtx), - _teeBuffer(TeeBuffer::create(facetPipelines.size())), - _facets(std::move(facetPipelines)) { + _teeBuffer(TeeBuffer::create(facetPipelines.size(), bufferSizeBytes)), + _facets(std::move(facetPipelines)), + _maxOutputDocSizeBytes(maxOutputDocBytes) { for (size_t facetId = 0; facetId < _facets.size(); ++facetId) { auto& facet = _facets[facetId]; facet.pipeline->addInitialSource( @@ -148,8 +151,12 @@ REGISTER_DOCUMENT_SOURCE(facet, DocumentSourceFacet::createFromBson); intrusive_ptr<DocumentSourceFacet> DocumentSourceFacet::create( - std::vector<FacetPipeline> facetPipelines, const intrusive_ptr<ExpressionContext>& expCtx) { - return new DocumentSourceFacet(std::move(facetPipelines), expCtx); + std::vector<FacetPipeline> facetPipelines, + const intrusive_ptr<ExpressionContext>& expCtx, + size_t bufferSizeBytes, + size_t maxOutputDocBytes) { + return new DocumentSourceFacet( + std::move(facetPipelines), expCtx, bufferSizeBytes, maxOutputDocBytes); } void DocumentSourceFacet::setSource(DocumentSource* source) { @@ -170,6 +177,17 @@ DocumentSource::GetNextResult DocumentSourceFacet::getNext() { return GetNextResult::makeEOF(); } + const size_t maxBytes = _maxOutputDocSizeBytes; + auto ensureUnderMemoryLimit = [ usedBytes = 0ul, &maxBytes ](long long additional) mutable { + usedBytes += additional; + uassert(4031700, + str::stream() << "document constructed by $facet is " << usedBytes + << " bytes, which exceeds the limit of " + << maxBytes + << " bytes", + usedBytes <= maxBytes); + }; + vector<vector<Value>> results(_facets.size()); bool allPipelinesEOF = false; while (!allPipelinesEOF) { @@ -178,6 +196,7 @@ DocumentSource::GetNextResult DocumentSourceFacet::getNext() { const auto& pipeline = _facets[facetId].pipeline; auto next = pipeline->getSources().back()->getNext(); for (; next.isAdvanced(); next = pipeline->getSources().back()->getNext()) { + ensureUnderMemoryLimit(next.getDocument().getApproximateSize()); results[facetId].emplace_back(next.releaseDocument()); } allPipelinesEOF = allPipelinesEOF && next.isEOF(); @@ -303,6 +322,6 @@ intrusive_ptr<DocumentSource> DocumentSourceFacet::createFromBson( facetPipelines.emplace_back(facetName, std::move(pipeline)); } - return new DocumentSourceFacet(std::move(facetPipelines), expCtx); + return DocumentSourceFacet::create(std::move(facetPipelines), expCtx); } } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_facet.h b/src/mongo/db/pipeline/document_source_facet.h index c534db24108..b196a7265fc 100644 --- a/src/mongo/db/pipeline/document_source_facet.h +++ b/src/mongo/db/pipeline/document_source_facet.h @@ -93,7 +93,9 @@ public: static boost::intrusive_ptr<DocumentSourceFacet> create( std::vector<FacetPipeline> facetPipelines, - const boost::intrusive_ptr<ExpressionContext>& expCtx); + const boost::intrusive_ptr<ExpressionContext>& expCtx, + size_t bufferSizeBytes = internalQueryFacetBufferSizeBytes.load(), + size_t maxOutputDocBytes = internalQueryFacetMaxOutputDocSizeBytes.load()); /** * Blocking call. Will consume all input and produces one output document. @@ -148,13 +150,17 @@ protected: private: DocumentSourceFacet(std::vector<FacetPipeline> facetPipelines, - const boost::intrusive_ptr<ExpressionContext>& expCtx); + const boost::intrusive_ptr<ExpressionContext>& expCtx, + size_t bufferSizeBytes, + size_t maxOutputDocBytes); Value serialize(boost::optional<ExplainOptions::Verbosity> explain = boost::none) const final; boost::intrusive_ptr<TeeBuffer> _teeBuffer; std::vector<FacetPipeline> _facets; + const size_t _maxOutputDocSizeBytes; + bool _done = false; }; } // namespace mongo diff --git a/src/mongo/db/query/query_knobs.cpp b/src/mongo/db/query/query_knobs.cpp index bdde620c87f..9e4a9aa3931 100644 --- a/src/mongo/db/query/query_knobs.cpp +++ b/src/mongo/db/query/query_knobs.cpp @@ -72,6 +72,15 @@ MONGO_EXPORT_SERVER_PARAMETER(internalQueryExecYieldPeriodMS, int, 10); MONGO_EXPORT_SERVER_PARAMETER(internalQueryFacetBufferSizeBytes, int, 100 * 1024 * 1024); +MONGO_EXPORT_SERVER_PARAMETER(internalQueryFacetMaxOutputDocSizeBytes, long long, 100 * 1024 * 1024) + ->withValidator([](const long long& newVal) { + if (newVal <= 0) { + return Status(ErrorCodes::BadValue, + "internalQueryFacetMaxOutputDocSizeBytes must be positive"); + } + return Status::OK(); + }); + MONGO_EXPORT_SERVER_PARAMETER(internalLookupStageIntermediateDocumentMaxSizeBytes, long long, 100 * 1024 * 1024) diff --git a/src/mongo/db/query/query_knobs.h b/src/mongo/db/query/query_knobs.h index 035915267e8..ed4304de4ae 100644 --- a/src/mongo/db/query/query_knobs.h +++ b/src/mongo/db/query/query_knobs.h @@ -118,6 +118,9 @@ const int64_t insertVectorMaxBytes = 256 * 1024; // The number of bytes to buffer at once during a $facet stage. extern AtomicInt32 internalQueryFacetBufferSizeBytes; +// The maximum size in bytes of the $facet stage's output document. +extern AtomicInt64 internalQueryFacetMaxOutputDocSizeBytes; + extern AtomicInt64 internalLookupStageIntermediateDocumentMaxSizeBytes; extern AtomicInt32 internalInsertMaxBatchSize; |