diff options
-rw-r--r-- | src/mongo/db/matcher/expression_algo.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo.h | 6 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_unwind.cpp | 35 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_unwind.h | 7 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_test.cpp | 189 |
5 files changed, 242 insertions, 0 deletions
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp index 11a280eb9ad..ca4ba9c68bf 100644 --- a/src/mongo/db/matcher/expression_algo.cpp +++ b/src/mongo/db/matcher/expression_algo.cpp @@ -503,5 +503,10 @@ bool isPathPrefixOf(StringData first, StringData second) { return second.startsWith(first) && second[first.size()] == '.'; } + +bool bidirectionalPathPrefixOf(StringData first, StringData second) { + return first == second || expression::isPathPrefixOf(first, second) || + expression::isPathPrefixOf(second, first); +} } // namespace expression } // namespace mongo diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h index 5373549f922..fb703066206 100644 --- a/src/mongo/db/matcher/expression_algo.h +++ b/src/mongo/db/matcher/expression_algo.h @@ -110,6 +110,12 @@ bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& bool isPathPrefixOf(StringData first, StringData second); /** + * Returns true if the first path is equal to the second path or if either is a prefix + * of the other. + */ +bool bidirectionalPathPrefixOf(StringData first, StringData second); + +/** * Applies 'func' to each node of 'expr', where the first argument is a pointer to that actual node * (not a copy), and the second argument is the path to that node. Callers should not depend on the * order of the traversal of the nodes. diff --git a/src/mongo/db/pipeline/document_source_unwind.cpp b/src/mongo/db/pipeline/document_source_unwind.cpp index b08e6f7aec3..ae0bfecbb9d 100644 --- a/src/mongo/db/pipeline/document_source_unwind.cpp +++ b/src/mongo/db/pipeline/document_source_unwind.cpp @@ -34,6 +34,8 @@ #include "mongo/db/exec/document_value/document.h" #include "mongo/db/exec/document_value/value.h" #include "mongo/db/jsobj.h" +#include "mongo/db/matcher/expression_algo.h" +#include "mongo/db/pipeline/document_source_sort.h" #include "mongo/db/pipeline/expression.h" #include "mongo/db/pipeline/lite_parsed_document_source.h" @@ -214,6 +216,39 @@ DocumentSource::GetModPathsReturn DocumentSourceUnwind::getModifiedPaths() const return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedFields), {}}; } +Pipeline::SourceContainer::iterator DocumentSourceUnwind::doOptimizeAt( + Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { + tassert(5482200, "DocumentSourceUnwind: itr must point to this object", *itr == this); + + if (std::next(itr) == container->end()) { + return container->end(); + } + + // If the following stage is $sort (on a different field), push before $unwind. + auto nextSort = dynamic_cast<DocumentSourceSort*>(std::next(itr)->get()); + if (nextSort) { + auto unwindPath = _unwindPath.fullPath(); + + // Checks if any of the $sort's paths depend on the unwind path (or vice versa). + SortPattern sortKeyPattern = nextSort->getSortKeyPattern(); + bool sortPathMatchesUnwindPath = + std::any_of(sortKeyPattern.begin(), sortKeyPattern.end(), [&](auto& sortKey) { + // If 'sortKey' is a $meta expression, we can do the swap. + if (!sortKey.fieldPath) + return false; + return expression::bidirectionalPathPrefixOf(unwindPath, + sortKey.fieldPath->fullPath()); + }); + + if (!sortPathMatchesUnwindPath) { + std::swap(*itr, *std::next(itr)); + return itr == container->begin() ? itr : std::prev(itr); + } + } + + return std::next(itr); +} + Value DocumentSourceUnwind::serialize(boost::optional<ExplainOptions::Verbosity> explain) const { return Value(DOC(getSourceName() << DOC( "path" << _unwindPath.fullPathWithPrefix() << "preserveNullAndEmptyArrays" diff --git a/src/mongo/db/pipeline/document_source_unwind.h b/src/mongo/db/pipeline/document_source_unwind.h index de7269d7639..9d825b213ed 100644 --- a/src/mongo/db/pipeline/document_source_unwind.h +++ b/src/mongo/db/pipeline/document_source_unwind.h @@ -92,6 +92,13 @@ public: return _indexPath; } +protected: + /** + * Attempts to swap with a subsequent $sort stage if the $sort is on a different field. + */ + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; + private: DocumentSourceUnwind(const boost::intrusive_ptr<ExpressionContext>& pExpCtx, const FieldPath& fieldPath, diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp index ab92b6fb9ff..37586a16abd 100644 --- a/src/mongo/db/pipeline/pipeline_test.cpp +++ b/src/mongo/db/pipeline/pipeline_test.cpp @@ -363,6 +363,195 @@ TEST(PipelineOptimizationTest, LimitDoesNotSwapBeforeSkipWithoutSort) { assertPipelineOptimizesTo(inputPipe, outputPipe); } +TEST(PipelineOptimizationTest, SortSwapsBeforeUnwind) { + std::string inputPipe = + "[{$unwind : {path: '$a'}}" + ",{$sort : {b: 1}}" + "]"; + std::string outputPipe = + "[{$sort : {sortKey: {b: 1}}}" + ",{$unwind : {path: '$a'}}" + "]"; + std::string serializedPipe = + "[{$sort : {b: 1}}" + ",{$unwind : {path: '$a'}}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortSwapsBeforeUnwindMultipleSorts) { + std::string inputPipe = + "[{$unwind : {path: '$a'}}" + ",{$sort : {b: 1}}" + ",{$sort : {c: 1}}" + "]"; + std::string outputPipe = + "[{$sort : {sortKey: {c: 1}}}" + ",{$unwind : {path: '$a'}}" + "]"; + std::string serializedPipe = + "[{$sort : {c: 1}}" + ",{$unwind : {path: '$a'}}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortSwapsBeforeUnwindDifferentDotPaths) { + std::string inputPipe = + "[{$unwind : {path: '$a.b'}}" + ",{$sort : {'a.c': 1}}" + "]"; + std::string outputPipe = + "[{$sort : {sortKey: {'a.c': 1}}}" + ",{$unwind : {path: '$a.b'}}" + "]"; + std::string serializedPipe = + "[{$sort : {'a.c': 1}}" + ",{$unwind : {path: '$a.b'}}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortSwapsBeforeUnwindMultipleSortPaths) { + std::string inputPipe = + "[{$unwind : {path: '$a'}}" + ",{$sort : {b: 1, c: 1}}" + "]"; + std::string outputPipe = + "[{$sort : {sortKey: {b: 1, c: 1}}}" + ",{$unwind : {path: '$a'}}" + "]"; + std::string serializedPipe = + "[{$sort : {b: 1, c: 1}}" + ",{$unwind : {path: '$a'}}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortDoesNotSwapBeforeUnwindMultipleSortPaths) { + std::string inputPipe = + "[{$unwind : {path: '$a'}}" + ",{$sort : {b: 1, a: 1}}" + "]"; + std::string outputPipe = + "[{$unwind : {path: '$a'}}" + ",{$sort : {sortKey: {b: 1, a: 1}}}" + "]"; + std::string serializedPipe = + "[{$unwind : {path: '$a'}}" + ",{$sort : {b: 1, a: 1}}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortDoesNotSwapBeforeUnwindBecauseSortPathPrefixOfUnwindPath) { + std::string inputPipe = + "[{$unwind : {path: '$b.a'}}" + ",{$sort : {b: 1}}" + "]"; + std::string outputPipe = + "[{$unwind : {path: '$b.a'}}" + ",{$sort : {sortKey: {b: 1}}}" + "]"; + std::string serializedPipe = + "[{$unwind : {path: '$b.a'}}" + ",{$sort : {b: 1}}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortDoesNotSwapBeforeUnwindBecauseUnwindPathPrefixOfSortPath) { + std::string inputPipe = + "[{$unwind : {path: '$b'}}" + ",{$sort : {'b.a': 1}}" + "]"; + std::string outputPipe = + "[{$unwind : {path: '$b'}}" + ",{$sort : {sortKey: {'b.a': 1}}}" + "]"; + std::string serializedPipe = + "[{$unwind : {path: '$b'}}" + ",{$sort : {'b.a': 1}}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortDoesNotSwapBeforeUnwindBecauseUnwindPathEqualToSortPath) { + std::string inputPipe = + "[{$unwind : {path: '$a.b'}}" + ",{$sort : {'a.b': 1}}" + "]"; + std::string outputPipe = + "[{$unwind : {path: '$a.b'}}" + ",{$sort : {sortKey: {'a.b': 1}}}" + "]"; + std::string serializedPipe = + "[{$unwind : {path: '$a.b'}}" + ",{$sort : {'a.b': 1}}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, LookupShouldCoalesceWithUnwindOnAsSortDoesNotInterfere) { + string inputPipe = + "[{$lookup: {from : 'lookupColl', as : 'same', localField: 'left', foreignField: " + "'right'}}" + ",{$unwind: {path: '$same'}}" + ",{$sort : {'a.b': 1}}" + "]"; + string outputPipe = + "[{$lookup: {from : 'lookupColl', as : 'same', localField: 'left', foreignField: " + "'right', unwinding: {preserveNullAndEmptyArrays: false}}}" + ",{$sort : {sortKey: {'a.b': 1}}}]"; + string serializedPipe = + "[{$lookup: {from : 'lookupColl', as : 'same', localField: 'left', foreignField: " + "'right'}}" + ",{$unwind: {path: '$same'}}" + ",{$sort : {'a.b': 1}}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortSwapsBeforeUnwindMetaWithFieldPath) { + std::string inputPipe = + "[{ $match: { $text: { $search: \"operating\" } }}" + ",{$unwind : {path: '$a'}}" + ",{$sort : {score: {$meta: \"textScore\"}, c: 1}}" + "]"; + std::string outputPipe = + "[{$match: {$text: {$search: \"operating\", $language: \"\", $caseSensitive: false, " + "$diacriticSensitive: false}}}" + ",{$sort: {sortKey: {$computed0: {$meta: \"textScore\"}, c: 1}}}" + ",{$unwind : {path: '$a'}}" + "]"; + std::string serializedPipe = + "[{ $match: { $text: { $search: \"operating\" } }}" + ",{$sort: {$computed0: {$meta: \"textScore\"}, c: 1}}" + ",{$unwind : {path: '$a'}}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, SortSwapsBeforeUnwindMetaWithoutFieldPath) { + std::string inputPipe = + "[{ $match: { $text: { $search: \"operating\" } }}" + ",{$unwind : {path: '$a'}}" + ",{$sort : {score: {$meta: \"textScore\"}}}" + "]"; + std::string outputPipe = + "[{$match: {$text: {$search: \"operating\", $language: \"\", $caseSensitive: false, " + "$diacriticSensitive: false}}}" + ",{$sort: {sortKey: {$computed0: {$meta: \"textScore\"}}}}" + ",{$unwind : {path: '$a'}}" + "]"; + std::string serializedPipe = + "[{ $match: { $text: { $search: \"operating\" } }}" + ",{$sort: {$computed0: {$meta: \"textScore\"}}}" + ",{$unwind : {path: '$a'}}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + TEST(PipelineOptimizationTest, SortMatchProjSkipLimBecomesMatchTopKSortSkipProj) { std::string inputPipe = "[{$sort: {a: 1}}" |