From e633ee07371dd276469766698d40f44f930b17ba Mon Sep 17 00:00:00 2001 From: Ted Tuckman Date: Wed, 5 Jan 2022 19:16:23 +0000 Subject: SERVER-60598 Optimize multiple sorts in pipeline with $densify --- jstests/aggregation/extras/utils.js | 20 +- .../sources/densify/densify_sort_optimization.js | 293 +++++++++++++++++++++ src/mongo/db/pipeline/document_source_densify.cpp | 104 ++++++-- src/mongo/db/pipeline/document_source_densify.h | 7 + src/mongo/db/query/sort_pattern.cpp | 14 + src/mongo/db/query/sort_pattern.h | 9 + 6 files changed, 425 insertions(+), 22 deletions(-) create mode 100644 jstests/aggregation/sources/densify/densify_sort_optimization.js diff --git a/jstests/aggregation/extras/utils.js b/jstests/aggregation/extras/utils.js index 7ea8398ebec..626a7a6939b 100644 --- a/jstests/aggregation/extras/utils.js +++ b/jstests/aggregation/extras/utils.js @@ -432,13 +432,23 @@ function collectionExists(coll) { * pipeline from the explain results regardless of cluster topology. */ function desugarSingleStageAggregation(db, coll, stage) { - const result = coll.explain().aggregate([ - // prevent stages from being absorbed into the .find() layer - {$_internalInhibitOptimization: {}}, - stage, - ]); + return getExplainedPipelineFromAggregation(db, coll, [stage]); +} + +/** + * Runs and asserts an explain command for an aggregation with the given pipeline. Returns just the + * pipeline from the explain results regardless of cluster topology. + */ +function getExplainedPipelineFromAggregation(db, coll, pipeline) { + // Prevent stages from being absorbed into the .find() layer + pipeline.unshift({$_internalInhibitOptimization: {}}); + const result = coll.explain().aggregate(pipeline); assert.commandWorked(result); + return getExplainPipelineFromAggregationResult(db, result); +} + +function getExplainPipelineFromAggregationResult(db, result) { // We proceed by cases based on topology. if (!FixtureHelpers.isMongos(db)) { assert(Array.isArray(result.stages), result); diff --git a/jstests/aggregation/sources/densify/densify_sort_optimization.js b/jstests/aggregation/sources/densify/densify_sort_optimization.js new file mode 100644 index 00000000000..ddc18ede260 --- /dev/null +++ b/jstests/aggregation/sources/densify/densify_sort_optimization.js @@ -0,0 +1,293 @@ +/** + * Test that $densify can combine sort stages. + * @tags: [ + * requires_fcv_53, + * requires_pipeline_optimization, + * do_not_wrap_aggregations_in_facets, + * ] + */ + +(function() { +"use strict"; + +load("jstests/libs/fixture_helpers.js"); +load("jstests/libs/feature_flag_util.js"); // For isEnabled. +load("jstests/aggregation/extras/utils.js"); // For getExplainedPipelineFromAggregation. + +const coll = db[jsTestName()]; +coll.drop(); + +const documents = [ + {_id: 0, val: 0}, + {_id: 1}, +]; + +assert.commandWorked(coll.insert(documents)); +const testCases = [ + // If there are no partitions densify can combine a smaller or equal sort. + [ + [{$densify: {field: "val", range: {step: 1, bounds: "full"}}}, {$sort: {val: 1}}], + [ + {"$sort": {"sortKey": {"val": 1}}}, + { + "$_internalDensify": { + "field": "val", + "partitionByFields": [], + "range": {"step": 1, "bounds": "full"} + } + }, + ] + ], // 0 + [ + [{$densify: {field: "val", range: {step: 1, bounds: "full"}}}, {$sort: {other: 1, val: 1}}], + [ + { + "$sort": { + "sortKey": { + "val": 1, + } + } + }, + { + "$_internalDensify": { + "field": "val", + "partitionByFields": [], + "range": {"step": 1, "bounds": "full"} + } + }, + { + "$sort": { + "sortKey": { + "other": 1, + "val": 1, + } + } + }, + ] + ], // 1 + [ + [{$densify: {field: "val", range: {step: 1, bounds: [-10, 10]}}}, {$sort: {val: 1}}], + [ + { + "$sort": { + "sortKey": { + "val": 1, + } + } + }, + { + "$_internalDensify": { + "field": "val", + "partitionByFields": [], + "range": {"step": 1, "bounds": [-10, 10]} + } + }, + ] + ], // 2 + // With partitions and range: "full" sorts cannot be combined. + [ + [ + { + $densify: + {field: "val", range: {step: 1, bounds: "full"}, partitionByFields: ["random"]} + }, + {$sort: {val: 1}} + ], + [ + {"$sort": {"sortKey": {"val": 1}}}, + { + "$_internalDensify": { + "field": "val", + "partitionByFields": ["random"], + "range": {"step": 1, "bounds": "full"} + } + }, + {"$sort": {"sortKey": {"val": 1}}} + ] + ], // 3 + + // Partitions with non-full bounds means the generated sort order is preserved. Combine if the + // sorts match. + [ + [ + { + $densify: { + field: "val", + range: {step: 1, bounds: "partition"}, + partitionByFields: ["random"] + } + }, + {$sort: {random: 1, val: 1}} + ], + [ + {"$sort": {"sortKey": {"random": 1, "val": 1}}}, + { + "$_internalDensify": { + "field": "val", + "partitionByFields": ["random"], + "range": {"step": 1, "bounds": "partition"} + } + }, + ] + ], // 4 + [ + [ + { + $densify: { + field: "val", + range: {step: 1, bounds: "partition"}, + partitionByFields: ["random"] + } + }, + {$sort: {other: 1, val: 1}} + ], + [ + {"$sort": {"sortKey": {"random": 1, "val": 1}}}, + { + "$_internalDensify": { + "field": "val", + "partitionByFields": ["random"], + "range": {"step": 1, "bounds": "partition"} + } + }, + {"$sort": {"sortKey": {"other": 1, "val": 1}}} + ] + ], // 5 + [ + [ + { + $densify: { + field: "val", + range: {step: 1, bounds: [-10, 10]}, + partitionByFields: ["random"] + } + }, + {$sort: {random: 1, val: 1}}, + ], + [ + {"$sort": {"sortKey": {"random": 1, "val": 1}}}, + { + "$_internalDensify": { + "field": "val", + "partitionByFields": ["random"], + "range": {"step": 1, "bounds": [-10, 10]} + } + }, + ] + ], // 6 + [ + [ + { + $densify: { + field: "val", + range: {step: 1, bounds: [-10, 10]}, + partitionByFields: ["random"] + } + }, + {$sort: {other: 1, val: 1}} + ], + [ + {"$sort": {"sortKey": {"random": 1, "val": 1}}}, + { + "$_internalDensify": { + "field": "val", + "partitionByFields": ["random"], + "range": {"step": 1, "bounds": [-10, 10]} + } + }, + {"$sort": {"sortKey": {"other": 1, "val": 1}}} + ] + ], // 7 + // Test that a following, stricter, sort is preserved and not combined. + [ + [ + { + $densify: { + field: "val", + range: {step: 1, bounds: "partition"}, + partitionByFields: ["random"] + } + }, + {$sort: {random: 1, val: 1, _id: 1}} + ], + [ + {"$sort": {"sortKey": {"random": 1, "val": 1}}}, + { + "$_internalDensify": { + "field": "val", + "partitionByFields": ["random"], + "range": {"step": 1, "bounds": "partition"} + } + }, + {"$sort": {"sortKey": {"random": 1, "val": 1, "_id": 1}}}, + ] + ], // 8 + // Demonstrate that multiple stages that combine sorts may still wind up with an extra sort at + // the end. + [ + [ + { + $densify: { + field: "val", + range: {step: 1, bounds: "partition"}, + partitionByFields: ["random"] + } + }, + { + $setWindowFields: + {partitionBy: "$random", sortBy: {"val": 1}, output: {val: {$sum: "$val"}}} + }, + {$sort: {random: 1, val: 1}} + ], + [ + {"$sort": {"sortKey": {"random": 1, "val": 1}}}, + { + "$_internalDensify": { + "field": "val", + "partitionByFields": ["random"], + "range": {"step": 1, "bounds": "partition"} + } + }, + { + "$_internalSetWindowFields": { + "partitionBy": "$random", + "sortBy": {"val": 1}, + "output": { + "val": + {"$sum": "$val", "window": {"documents": ["unbounded", "unbounded"]}} + } + } + }, + {$sort: {sortKey: {random: 1, val: 1}}} + ] + ], // 9 + // Test that if the densify generated sort is preceded by an additional sort, we optimize based + // on the densify sort not the preceding one. + [ + [ + {$sort: {val: 1, other: 1}}, + {$densify: {field: "val", range: {step: 1, bounds: "full"}}}, + {$sort: {val: 1, other: 1}} + ], + [ + {"$sort": {"sortKey": {"val": 1}}}, + { + "$_internalDensify": { + "field": "val", + "partitionByFields": [], + "range": {"step": 1, "bounds": "full"} + } + }, + {"$sort": {"sortKey": {"val": 1, "other": 1}}}, + ] + + ], // 10 +]; +for (let i = 0; i < testCases.length; i++) { + let result = getExplainedPipelineFromAggregation(db, coll, testCases[i][0]); + + assert(anyEq(result, testCases[i][1]), + "Test case " + i + " failed.\n" + + "Expected:\n" + tojson(testCases[i][1]) + "\nGot:\n" + tojson(result)); +} +})(); diff --git a/src/mongo/db/pipeline/document_source_densify.cpp b/src/mongo/db/pipeline/document_source_densify.cpp index 11a4c973675..1caae5dc47b 100644 --- a/src/mongo/db/pipeline/document_source_densify.cpp +++ b/src/mongo/db/pipeline/document_source_densify.cpp @@ -28,8 +28,10 @@ */ #include "mongo/db/pipeline/document_source_densify.h" +#include "mongo/base/exact_cast.h" #include "mongo/db/pipeline/document_source_sort.h" #include "mongo/db/pipeline/field_path.h" +#include "mongo/db/query/sort_pattern.h" #include "mongo/stdx/variant.h" #include "mongo/util/assert_util.h" #include "mongo/util/visit_helper.h" @@ -174,6 +176,27 @@ list> createFromBson(BSONElement elem, return createFromBsonInternal(elem, expCtx, kStageName, false); } +SortPattern getSortPatternForDensify(RangeStatement rangeStatement, + list partitions, + FieldPath field) { + // Add partition fields to sort spec. + std::vector sortParts; + // We do not add partitions to the sort spec if the range is "full". + if (!stdx::holds_alternative(rangeStatement.getBounds())) { + for (auto partition : partitions) { + SortPatternPart part; + part.fieldPath = partition.fullPath(); + sortParts.push_back(std::move(part)); + } + } + + // Add field path to sort spec. + SortPatternPart part; + part.fieldPath = field.fullPath(); + sortParts.push_back(std::move(part)); + return SortPattern{sortParts}; +} + list> create(const intrusive_ptr& expCtx, list partitions, FieldPath field, @@ -184,24 +207,9 @@ list> create(const intrusive_ptr sortParts; - // We do not add partitions to the sort spec if the range is "full". - if (!stdx::holds_alternative(rangeStatement.getBounds())) { - for (auto partition : partitions) { - SortPatternPart part; - part.fieldPath = partition.fullPath(); - sortParts.push_back(std::move(part)); - } - } - - // Add field path to sort spec. - SortPatternPart part; - part.fieldPath = field.fullPath(); - sortParts.push_back(std::move(part)); - + auto sortPattern = getSortPatternForDensify(rangeStatement, partitions, field); // Constructing resulting stages. - results.push_back(DocumentSourceSort::create(expCtx, SortPattern{sortParts})); + results.push_back(DocumentSourceSort::create(expCtx, sortPattern)); } // Constructing resulting stages. @@ -929,4 +937,66 @@ bool DensifyValue::isOnStepRelativeTo(DensifyValue base, RangeStatement range) c }}, _value); } + +Pipeline::SourceContainer::iterator DocumentSourceInternalDensify::combineSorts( + Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { + if (std::next(itr) == container->end() || itr == container->begin()) { + return container->end(); + } + + // We can only combine the sorts if we can guarantee the output order will maintain the + // sort. Densify changes the sort order if partitions are present and range is type 'full'. + if (_partitions.size() != 0 && stdx::holds_alternative(_range.getBounds())) { + // We will not maintain sort order. + return std::next(itr); + } + + // If $densify was the first stage in the pipeline, there should be a preceding sort. + tassert(6059802, "$_internalDensify did not have a preceding stage", itr != container->begin()); + // Get the spec of the preceding sort stage. Densify always has a preceding sort, unless + // the preceding sort was already removed by an earlier stage. + const auto preSortItr = std::prev(itr); + const auto preSortStage = dynamic_cast((*preSortItr).get()); + if (!preSortStage || preSortStage->getLimit()) { + return std::next(itr); + } + + // Check that the preceding sort was actually generated by $densify, and not by combining the + // generated sort with a sort earlier in the pipeline. + auto densifySortPattern = + document_source_densify::getSortPatternForDensify(_range, _partitions, _field); + + auto preDensifySortPattern = preSortStage->getSortKeyPattern(); + if (densifySortPattern != preDensifySortPattern) { + return std::next(itr); + } + + // Get the spec of the following sort stage, if it exists. + const auto postSortItr = std::next(itr); + const auto postSortStage = dynamic_cast((*postSortItr).get()); + if (!postSortStage || postSortStage->getLimit()) { + // If there is not a following sort stage, we won't do any optimization. Return the next + // stage in the pipeline. + return std::next(itr); + } + auto postDensifySortPattern = postSortStage->getSortKeyPattern(); + + // We can only combine the sorts if the sorts are compatible. $densify only preserves a sort on + // the fields on which it operates, as any other fields will be missing in generated documents. + if (!preDensifySortPattern.isExtensionOf(postDensifySortPattern)) { + return std::next(itr); + } + + // If the post sort is longer, we would have bailed earlier. Remove the sort after the $densify. + container->erase(postSortItr); + + return std::prev(itr); +} + +Pipeline::SourceContainer::iterator DocumentSourceInternalDensify::doOptimizeAt( + Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { + tassert(6059800, "Expected to optimize $densify stage", *itr == this); + + return combineSorts(itr, container); +} } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_densify.h b/src/mongo/db/pipeline/document_source_densify.h index 24c44b52922..c7667d21f2c 100644 --- a/src/mongo/db/pipeline/document_source_densify.h +++ b/src/mongo/db/pipeline/document_source_densify.h @@ -392,6 +392,9 @@ public: GetNextResult doGetNext() final; +protected: + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; private: enum class ValComparedToRange { @@ -531,6 +534,10 @@ private: createDocGenerator(min, range, boost::none, boost::none); } + + Pipeline::SourceContainer::iterator combineSorts(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container); + boost::optional _docGenerator = boost::none; /** diff --git a/src/mongo/db/query/sort_pattern.cpp b/src/mongo/db/query/sort_pattern.cpp index 29b58a03322..fcd3cd177e1 100644 --- a/src/mongo/db/query/sort_pattern.cpp +++ b/src/mongo/db/query/sort_pattern.cpp @@ -151,4 +151,18 @@ void SortPattern::addDependencies(DepsTracker* deps) const { } } } + +bool SortPattern::isExtensionOf(const SortPattern& other) const { + // If the other is longer, this cannot be an extension of it. + if (_sortPattern.size() < other._sortPattern.size()) { + return false; + } + // For each sortPatternPart in the other sort pattern, make sure we have it as well in order. + for (unsigned int i = 0; i < other._sortPattern.size(); ++i) { + if (_sortPattern[i] != other._sortPattern[i]) { + return false; + } + } + return true; +} } // namespace mongo diff --git a/src/mongo/db/query/sort_pattern.h b/src/mongo/db/query/sort_pattern.h index 1b55472a79b..bed9b338535 100644 --- a/src/mongo/db/query/sort_pattern.h +++ b/src/mongo/db/query/sort_pattern.h @@ -105,10 +105,19 @@ public: return _sortPattern[idx]; } + /** + * Returns true if this SortPattern is an extension of the other. + */ + bool isExtensionOf(const SortPattern& other) const; + bool operator==(const SortPattern& other) const { return _sortPattern == other._sortPattern && _paths == other._paths; } + bool operator!=(const SortPattern& other) const { + return !(*this == other); + } + std::vector::const_iterator begin() const { return _sortPattern.cbegin(); } -- cgit v1.2.1