summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTed Tuckman <ted.tuckman@mongodb.com>2022-01-05 19:16:23 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-01-05 20:08:03 +0000
commite633ee07371dd276469766698d40f44f930b17ba (patch)
treec21a0c2db98cd371e01831cdda4f50170907ec71
parente2d48665f771a82bfe7de2a112276cd3692a6007 (diff)
downloadmongo-e633ee07371dd276469766698d40f44f930b17ba.tar.gz
SERVER-60598 Optimize multiple sorts in pipeline with $densify
-rw-r--r--jstests/aggregation/extras/utils.js20
-rw-r--r--jstests/aggregation/sources/densify/densify_sort_optimization.js293
-rw-r--r--src/mongo/db/pipeline/document_source_densify.cpp104
-rw-r--r--src/mongo/db/pipeline/document_source_densify.h7
-rw-r--r--src/mongo/db/query/sort_pattern.cpp14
-rw-r--r--src/mongo/db/query/sort_pattern.h9
6 files changed, 425 insertions, 22 deletions
diff --git a/jstests/aggregation/extras/utils.js b/jstests/aggregation/extras/utils.js
index 7ea8398ebec..626a7a6939b 100644
--- a/jstests/aggregation/extras/utils.js
+++ b/jstests/aggregation/extras/utils.js
@@ -432,13 +432,23 @@ function collectionExists(coll) {
* pipeline from the explain results regardless of cluster topology.
*/
function desugarSingleStageAggregation(db, coll, stage) {
- const result = coll.explain().aggregate([
- // prevent stages from being absorbed into the .find() layer
- {$_internalInhibitOptimization: {}},
- stage,
- ]);
+ return getExplainedPipelineFromAggregation(db, coll, [stage]);
+}
+
+/**
+ * Runs and asserts an explain command for an aggregation with the given pipeline. Returns just the
+ * pipeline from the explain results regardless of cluster topology.
+ */
+function getExplainedPipelineFromAggregation(db, coll, pipeline) {
+ // Prevent stages from being absorbed into the .find() layer
+ pipeline.unshift({$_internalInhibitOptimization: {}});
+ const result = coll.explain().aggregate(pipeline);
assert.commandWorked(result);
+ return getExplainPipelineFromAggregationResult(db, result);
+}
+
+function getExplainPipelineFromAggregationResult(db, result) {
// We proceed by cases based on topology.
if (!FixtureHelpers.isMongos(db)) {
assert(Array.isArray(result.stages), result);
diff --git a/jstests/aggregation/sources/densify/densify_sort_optimization.js b/jstests/aggregation/sources/densify/densify_sort_optimization.js
new file mode 100644
index 00000000000..ddc18ede260
--- /dev/null
+++ b/jstests/aggregation/sources/densify/densify_sort_optimization.js
@@ -0,0 +1,293 @@
+/**
+ * Test that $densify can combine sort stages.
+ * @tags: [
+ * requires_fcv_53,
+ * requires_pipeline_optimization,
+ * do_not_wrap_aggregations_in_facets,
+ * ]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fixture_helpers.js");
+load("jstests/libs/feature_flag_util.js"); // For isEnabled.
+load("jstests/aggregation/extras/utils.js"); // For getExplainedPipelineFromAggregation.
+
+const coll = db[jsTestName()];
+coll.drop();
+
+const documents = [
+ {_id: 0, val: 0},
+ {_id: 1},
+];
+
+assert.commandWorked(coll.insert(documents));
+const testCases = [
+ // If there are no partitions densify can combine a smaller or equal sort.
+ [
+ [{$densify: {field: "val", range: {step: 1, bounds: "full"}}}, {$sort: {val: 1}}],
+ [
+ {"$sort": {"sortKey": {"val": 1}}},
+ {
+ "$_internalDensify": {
+ "field": "val",
+ "partitionByFields": [],
+ "range": {"step": 1, "bounds": "full"}
+ }
+ },
+ ]
+ ], // 0
+ [
+ [{$densify: {field: "val", range: {step: 1, bounds: "full"}}}, {$sort: {other: 1, val: 1}}],
+ [
+ {
+ "$sort": {
+ "sortKey": {
+ "val": 1,
+ }
+ }
+ },
+ {
+ "$_internalDensify": {
+ "field": "val",
+ "partitionByFields": [],
+ "range": {"step": 1, "bounds": "full"}
+ }
+ },
+ {
+ "$sort": {
+ "sortKey": {
+ "other": 1,
+ "val": 1,
+ }
+ }
+ },
+ ]
+ ], // 1
+ [
+ [{$densify: {field: "val", range: {step: 1, bounds: [-10, 10]}}}, {$sort: {val: 1}}],
+ [
+ {
+ "$sort": {
+ "sortKey": {
+ "val": 1,
+ }
+ }
+ },
+ {
+ "$_internalDensify": {
+ "field": "val",
+ "partitionByFields": [],
+ "range": {"step": 1, "bounds": [-10, 10]}
+ }
+ },
+ ]
+ ], // 2
+ // With partitions and range: "full" sorts cannot be combined.
+ [
+ [
+ {
+ $densify:
+ {field: "val", range: {step: 1, bounds: "full"}, partitionByFields: ["random"]}
+ },
+ {$sort: {val: 1}}
+ ],
+ [
+ {"$sort": {"sortKey": {"val": 1}}},
+ {
+ "$_internalDensify": {
+ "field": "val",
+ "partitionByFields": ["random"],
+ "range": {"step": 1, "bounds": "full"}
+ }
+ },
+ {"$sort": {"sortKey": {"val": 1}}}
+ ]
+ ], // 3
+
+ // Partitions with non-full bounds means the generated sort order is preserved. Combine if the
+ // sorts match.
+ [
+ [
+ {
+ $densify: {
+ field: "val",
+ range: {step: 1, bounds: "partition"},
+ partitionByFields: ["random"]
+ }
+ },
+ {$sort: {random: 1, val: 1}}
+ ],
+ [
+ {"$sort": {"sortKey": {"random": 1, "val": 1}}},
+ {
+ "$_internalDensify": {
+ "field": "val",
+ "partitionByFields": ["random"],
+ "range": {"step": 1, "bounds": "partition"}
+ }
+ },
+ ]
+ ], // 4
+ [
+ [
+ {
+ $densify: {
+ field: "val",
+ range: {step: 1, bounds: "partition"},
+ partitionByFields: ["random"]
+ }
+ },
+ {$sort: {other: 1, val: 1}}
+ ],
+ [
+ {"$sort": {"sortKey": {"random": 1, "val": 1}}},
+ {
+ "$_internalDensify": {
+ "field": "val",
+ "partitionByFields": ["random"],
+ "range": {"step": 1, "bounds": "partition"}
+ }
+ },
+ {"$sort": {"sortKey": {"other": 1, "val": 1}}}
+ ]
+ ], // 5
+ [
+ [
+ {
+ $densify: {
+ field: "val",
+ range: {step: 1, bounds: [-10, 10]},
+ partitionByFields: ["random"]
+ }
+ },
+ {$sort: {random: 1, val: 1}},
+ ],
+ [
+ {"$sort": {"sortKey": {"random": 1, "val": 1}}},
+ {
+ "$_internalDensify": {
+ "field": "val",
+ "partitionByFields": ["random"],
+ "range": {"step": 1, "bounds": [-10, 10]}
+ }
+ },
+ ]
+ ], // 6
+ [
+ [
+ {
+ $densify: {
+ field: "val",
+ range: {step: 1, bounds: [-10, 10]},
+ partitionByFields: ["random"]
+ }
+ },
+ {$sort: {other: 1, val: 1}}
+ ],
+ [
+ {"$sort": {"sortKey": {"random": 1, "val": 1}}},
+ {
+ "$_internalDensify": {
+ "field": "val",
+ "partitionByFields": ["random"],
+ "range": {"step": 1, "bounds": [-10, 10]}
+ }
+ },
+ {"$sort": {"sortKey": {"other": 1, "val": 1}}}
+ ]
+ ], // 7
+ // Test that a following, stricter, sort is preserved and not combined.
+ [
+ [
+ {
+ $densify: {
+ field: "val",
+ range: {step: 1, bounds: "partition"},
+ partitionByFields: ["random"]
+ }
+ },
+ {$sort: {random: 1, val: 1, _id: 1}}
+ ],
+ [
+ {"$sort": {"sortKey": {"random": 1, "val": 1}}},
+ {
+ "$_internalDensify": {
+ "field": "val",
+ "partitionByFields": ["random"],
+ "range": {"step": 1, "bounds": "partition"}
+ }
+ },
+ {"$sort": {"sortKey": {"random": 1, "val": 1, "_id": 1}}},
+ ]
+ ], // 8
+ // Demonstrate that multiple stages that combine sorts may still wind up with an extra sort at
+ // the end.
+ [
+ [
+ {
+ $densify: {
+ field: "val",
+ range: {step: 1, bounds: "partition"},
+ partitionByFields: ["random"]
+ }
+ },
+ {
+ $setWindowFields:
+ {partitionBy: "$random", sortBy: {"val": 1}, output: {val: {$sum: "$val"}}}
+ },
+ {$sort: {random: 1, val: 1}}
+ ],
+ [
+ {"$sort": {"sortKey": {"random": 1, "val": 1}}},
+ {
+ "$_internalDensify": {
+ "field": "val",
+ "partitionByFields": ["random"],
+ "range": {"step": 1, "bounds": "partition"}
+ }
+ },
+ {
+ "$_internalSetWindowFields": {
+ "partitionBy": "$random",
+ "sortBy": {"val": 1},
+ "output": {
+ "val":
+ {"$sum": "$val", "window": {"documents": ["unbounded", "unbounded"]}}
+ }
+ }
+ },
+ {$sort: {sortKey: {random: 1, val: 1}}}
+ ]
+ ], // 9
+ // Test that if the densify generated sort is preceded by an additional sort, we optimize based
+ // on the densify sort not the preceding one.
+ [
+ [
+ {$sort: {val: 1, other: 1}},
+ {$densify: {field: "val", range: {step: 1, bounds: "full"}}},
+ {$sort: {val: 1, other: 1}}
+ ],
+ [
+ {"$sort": {"sortKey": {"val": 1}}},
+ {
+ "$_internalDensify": {
+ "field": "val",
+ "partitionByFields": [],
+ "range": {"step": 1, "bounds": "full"}
+ }
+ },
+ {"$sort": {"sortKey": {"val": 1, "other": 1}}},
+ ]
+
+ ], // 10
+];
+for (let i = 0; i < testCases.length; i++) {
+ let result = getExplainedPipelineFromAggregation(db, coll, testCases[i][0]);
+
+ assert(anyEq(result, testCases[i][1]),
+ "Test case " + i + " failed.\n" +
+ "Expected:\n" + tojson(testCases[i][1]) + "\nGot:\n" + tojson(result));
+}
+})();
diff --git a/src/mongo/db/pipeline/document_source_densify.cpp b/src/mongo/db/pipeline/document_source_densify.cpp
index 11a4c973675..1caae5dc47b 100644
--- a/src/mongo/db/pipeline/document_source_densify.cpp
+++ b/src/mongo/db/pipeline/document_source_densify.cpp
@@ -28,8 +28,10 @@
*/
#include "mongo/db/pipeline/document_source_densify.h"
+#include "mongo/base/exact_cast.h"
#include "mongo/db/pipeline/document_source_sort.h"
#include "mongo/db/pipeline/field_path.h"
+#include "mongo/db/query/sort_pattern.h"
#include "mongo/stdx/variant.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/visit_helper.h"
@@ -174,6 +176,27 @@ list<intrusive_ptr<DocumentSource>> createFromBson(BSONElement elem,
return createFromBsonInternal(elem, expCtx, kStageName, false);
}
+SortPattern getSortPatternForDensify(RangeStatement rangeStatement,
+ list<FieldPath> partitions,
+ FieldPath field) {
+ // Add partition fields to sort spec.
+ std::vector<SortPatternPart> sortParts;
+ // We do not add partitions to the sort spec if the range is "full".
+ if (!stdx::holds_alternative<Full>(rangeStatement.getBounds())) {
+ for (auto partition : partitions) {
+ SortPatternPart part;
+ part.fieldPath = partition.fullPath();
+ sortParts.push_back(std::move(part));
+ }
+ }
+
+ // Add field path to sort spec.
+ SortPatternPart part;
+ part.fieldPath = field.fullPath();
+ sortParts.push_back(std::move(part));
+ return SortPattern{sortParts};
+}
+
list<intrusive_ptr<DocumentSource>> create(const intrusive_ptr<ExpressionContext>& expCtx,
list<FieldPath> partitions,
FieldPath field,
@@ -184,24 +207,9 @@ list<intrusive_ptr<DocumentSource>> create(const intrusive_ptr<ExpressionContext
// If we're creating an internal stage then we must not desugar and produce a sort stage in
// addition.
if (!isInternal) {
- // Add partition fields to sort spec.
- std::vector<SortPatternPart> sortParts;
- // We do not add partitions to the sort spec if the range is "full".
- if (!stdx::holds_alternative<Full>(rangeStatement.getBounds())) {
- for (auto partition : partitions) {
- SortPatternPart part;
- part.fieldPath = partition.fullPath();
- sortParts.push_back(std::move(part));
- }
- }
-
- // Add field path to sort spec.
- SortPatternPart part;
- part.fieldPath = field.fullPath();
- sortParts.push_back(std::move(part));
-
+ auto sortPattern = getSortPatternForDensify(rangeStatement, partitions, field);
// Constructing resulting stages.
- results.push_back(DocumentSourceSort::create(expCtx, SortPattern{sortParts}));
+ results.push_back(DocumentSourceSort::create(expCtx, sortPattern));
}
// Constructing resulting stages.
@@ -929,4 +937,66 @@ bool DensifyValue::isOnStepRelativeTo(DensifyValue base, RangeStatement range) c
}},
_value);
}
+
+Pipeline::SourceContainer::iterator DocumentSourceInternalDensify::combineSorts(
+ Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
+ if (std::next(itr) == container->end() || itr == container->begin()) {
+ return container->end();
+ }
+
+ // We can only combine the sorts if we can guarantee the output order will maintain the
+ // sort. Densify changes the sort order if partitions are present and range is type 'full'.
+ if (_partitions.size() != 0 && stdx::holds_alternative<Full>(_range.getBounds())) {
+ // We will not maintain sort order.
+ return std::next(itr);
+ }
+
+ // If $densify was the first stage in the pipeline, there should be a preceding sort.
+ tassert(6059802, "$_internalDensify did not have a preceding stage", itr != container->begin());
+ // Get the spec of the preceding sort stage. Densify always has a preceding sort, unless
+ // the preceding sort was already removed by an earlier stage.
+ const auto preSortItr = std::prev(itr);
+ const auto preSortStage = dynamic_cast<DocumentSourceSort*>((*preSortItr).get());
+ if (!preSortStage || preSortStage->getLimit()) {
+ return std::next(itr);
+ }
+
+ // Check that the preceding sort was actually generated by $densify, and not by combining the
+ // generated sort with a sort earlier in the pipeline.
+ auto densifySortPattern =
+ document_source_densify::getSortPatternForDensify(_range, _partitions, _field);
+
+ auto preDensifySortPattern = preSortStage->getSortKeyPattern();
+ if (densifySortPattern != preDensifySortPattern) {
+ return std::next(itr);
+ }
+
+ // Get the spec of the following sort stage, if it exists.
+ const auto postSortItr = std::next(itr);
+ const auto postSortStage = dynamic_cast<DocumentSourceSort*>((*postSortItr).get());
+ if (!postSortStage || postSortStage->getLimit()) {
+ // If there is not a following sort stage, we won't do any optimization. Return the next
+ // stage in the pipeline.
+ return std::next(itr);
+ }
+ auto postDensifySortPattern = postSortStage->getSortKeyPattern();
+
+ // We can only combine the sorts if the sorts are compatible. $densify only preserves a sort on
+ // the fields on which it operates, as any other fields will be missing in generated documents.
+ if (!preDensifySortPattern.isExtensionOf(postDensifySortPattern)) {
+ return std::next(itr);
+ }
+
+ // If the post sort is longer, we would have bailed earlier. Remove the sort after the $densify.
+ container->erase(postSortItr);
+
+ return std::prev(itr);
+}
+
+Pipeline::SourceContainer::iterator DocumentSourceInternalDensify::doOptimizeAt(
+ Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
+ tassert(6059800, "Expected to optimize $densify stage", *itr == this);
+
+ return combineSorts(itr, container);
+}
} // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_densify.h b/src/mongo/db/pipeline/document_source_densify.h
index 24c44b52922..c7667d21f2c 100644
--- a/src/mongo/db/pipeline/document_source_densify.h
+++ b/src/mongo/db/pipeline/document_source_densify.h
@@ -392,6 +392,9 @@ public:
GetNextResult doGetNext() final;
+protected:
+ Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
private:
enum class ValComparedToRange {
@@ -531,6 +534,10 @@ private:
createDocGenerator(min, range, boost::none, boost::none);
}
+
+ Pipeline::SourceContainer::iterator combineSorts(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container);
+
boost::optional<DocGenerator> _docGenerator = boost::none;
/**
diff --git a/src/mongo/db/query/sort_pattern.cpp b/src/mongo/db/query/sort_pattern.cpp
index 29b58a03322..fcd3cd177e1 100644
--- a/src/mongo/db/query/sort_pattern.cpp
+++ b/src/mongo/db/query/sort_pattern.cpp
@@ -151,4 +151,18 @@ void SortPattern::addDependencies(DepsTracker* deps) const {
}
}
}
+
+bool SortPattern::isExtensionOf(const SortPattern& other) const {
+ // If the other is longer, this cannot be an extension of it.
+ if (_sortPattern.size() < other._sortPattern.size()) {
+ return false;
+ }
+ // For each sortPatternPart in the other sort pattern, make sure we have it as well in order.
+ for (unsigned int i = 0; i < other._sortPattern.size(); ++i) {
+ if (_sortPattern[i] != other._sortPattern[i]) {
+ return false;
+ }
+ }
+ return true;
+}
} // namespace mongo
diff --git a/src/mongo/db/query/sort_pattern.h b/src/mongo/db/query/sort_pattern.h
index 1b55472a79b..bed9b338535 100644
--- a/src/mongo/db/query/sort_pattern.h
+++ b/src/mongo/db/query/sort_pattern.h
@@ -105,10 +105,19 @@ public:
return _sortPattern[idx];
}
+ /**
+ * Returns true if this SortPattern is an extension of the other.
+ */
+ bool isExtensionOf(const SortPattern& other) const;
+
bool operator==(const SortPattern& other) const {
return _sortPattern == other._sortPattern && _paths == other._paths;
}
+ bool operator!=(const SortPattern& other) const {
+ return !(*this == other);
+ }
+
std::vector<SortPatternPart>::const_iterator begin() const {
return _sortPattern.cbegin();
}