summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mongo/db/matcher/expression_algo.cpp5
-rw-r--r--src/mongo/db/matcher/expression_algo.h6
-rw-r--r--src/mongo/db/pipeline/document_source_unwind.cpp35
-rw-r--r--src/mongo/db/pipeline/document_source_unwind.h7
-rw-r--r--src/mongo/db/pipeline/pipeline_test.cpp189
5 files changed, 242 insertions, 0 deletions
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp
index 11a280eb9ad..ca4ba9c68bf 100644
--- a/src/mongo/db/matcher/expression_algo.cpp
+++ b/src/mongo/db/matcher/expression_algo.cpp
@@ -503,5 +503,10 @@ bool isPathPrefixOf(StringData first, StringData second) {
return second.startsWith(first) && second[first.size()] == '.';
}
+
+bool bidirectionalPathPrefixOf(StringData first, StringData second) {
+ return first == second || expression::isPathPrefixOf(first, second) ||
+ expression::isPathPrefixOf(second, first);
+}
} // namespace expression
} // namespace mongo
diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h
index 5373549f922..fb703066206 100644
--- a/src/mongo/db/matcher/expression_algo.h
+++ b/src/mongo/db/matcher/expression_algo.h
@@ -110,6 +110,12 @@ bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>&
bool isPathPrefixOf(StringData first, StringData second);
/**
+ * Returns true if the first path is equal to the second path or if either is a prefix
+ * of the other.
+ */
+bool bidirectionalPathPrefixOf(StringData first, StringData second);
+
+/**
* Applies 'func' to each node of 'expr', where the first argument is a pointer to that actual node
* (not a copy), and the second argument is the path to that node. Callers should not depend on the
* order of the traversal of the nodes.
diff --git a/src/mongo/db/pipeline/document_source_unwind.cpp b/src/mongo/db/pipeline/document_source_unwind.cpp
index b08e6f7aec3..ae0bfecbb9d 100644
--- a/src/mongo/db/pipeline/document_source_unwind.cpp
+++ b/src/mongo/db/pipeline/document_source_unwind.cpp
@@ -34,6 +34,8 @@
#include "mongo/db/exec/document_value/document.h"
#include "mongo/db/exec/document_value/value.h"
#include "mongo/db/jsobj.h"
+#include "mongo/db/matcher/expression_algo.h"
+#include "mongo/db/pipeline/document_source_sort.h"
#include "mongo/db/pipeline/expression.h"
#include "mongo/db/pipeline/lite_parsed_document_source.h"
@@ -214,6 +216,39 @@ DocumentSource::GetModPathsReturn DocumentSourceUnwind::getModifiedPaths() const
return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedFields), {}};
}
+Pipeline::SourceContainer::iterator DocumentSourceUnwind::doOptimizeAt(
+ Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
+ tassert(5482200, "DocumentSourceUnwind: itr must point to this object", *itr == this);
+
+ if (std::next(itr) == container->end()) {
+ return container->end();
+ }
+
+ // If the following stage is $sort (on a different field), push before $unwind.
+ auto nextSort = dynamic_cast<DocumentSourceSort*>(std::next(itr)->get());
+ if (nextSort) {
+ auto unwindPath = _unwindPath.fullPath();
+
+ // Checks if any of the $sort's paths depend on the unwind path (or vice versa).
+ SortPattern sortKeyPattern = nextSort->getSortKeyPattern();
+ bool sortPathMatchesUnwindPath =
+ std::any_of(sortKeyPattern.begin(), sortKeyPattern.end(), [&](auto& sortKey) {
+ // If 'sortKey' is a $meta expression, we can do the swap.
+ if (!sortKey.fieldPath)
+ return false;
+ return expression::bidirectionalPathPrefixOf(unwindPath,
+ sortKey.fieldPath->fullPath());
+ });
+
+ if (!sortPathMatchesUnwindPath) {
+ std::swap(*itr, *std::next(itr));
+ return itr == container->begin() ? itr : std::prev(itr);
+ }
+ }
+
+ return std::next(itr);
+}
+
Value DocumentSourceUnwind::serialize(boost::optional<ExplainOptions::Verbosity> explain) const {
return Value(DOC(getSourceName() << DOC(
"path" << _unwindPath.fullPathWithPrefix() << "preserveNullAndEmptyArrays"
diff --git a/src/mongo/db/pipeline/document_source_unwind.h b/src/mongo/db/pipeline/document_source_unwind.h
index de7269d7639..9d825b213ed 100644
--- a/src/mongo/db/pipeline/document_source_unwind.h
+++ b/src/mongo/db/pipeline/document_source_unwind.h
@@ -92,6 +92,13 @@ public:
return _indexPath;
}
+protected:
+ /**
+ * Attempts to swap with a subsequent $sort stage if the $sort is on a different field.
+ */
+ Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
+
private:
DocumentSourceUnwind(const boost::intrusive_ptr<ExpressionContext>& pExpCtx,
const FieldPath& fieldPath,
diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp
index ab92b6fb9ff..37586a16abd 100644
--- a/src/mongo/db/pipeline/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/pipeline_test.cpp
@@ -363,6 +363,195 @@ TEST(PipelineOptimizationTest, LimitDoesNotSwapBeforeSkipWithoutSort) {
assertPipelineOptimizesTo(inputPipe, outputPipe);
}
+TEST(PipelineOptimizationTest, SortSwapsBeforeUnwind) {
+ std::string inputPipe =
+ "[{$unwind : {path: '$a'}}"
+ ",{$sort : {b: 1}}"
+ "]";
+ std::string outputPipe =
+ "[{$sort : {sortKey: {b: 1}}}"
+ ",{$unwind : {path: '$a'}}"
+ "]";
+ std::string serializedPipe =
+ "[{$sort : {b: 1}}"
+ ",{$unwind : {path: '$a'}}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortSwapsBeforeUnwindMultipleSorts) {
+ std::string inputPipe =
+ "[{$unwind : {path: '$a'}}"
+ ",{$sort : {b: 1}}"
+ ",{$sort : {c: 1}}"
+ "]";
+ std::string outputPipe =
+ "[{$sort : {sortKey: {c: 1}}}"
+ ",{$unwind : {path: '$a'}}"
+ "]";
+ std::string serializedPipe =
+ "[{$sort : {c: 1}}"
+ ",{$unwind : {path: '$a'}}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortSwapsBeforeUnwindDifferentDotPaths) {
+ std::string inputPipe =
+ "[{$unwind : {path: '$a.b'}}"
+ ",{$sort : {'a.c': 1}}"
+ "]";
+ std::string outputPipe =
+ "[{$sort : {sortKey: {'a.c': 1}}}"
+ ",{$unwind : {path: '$a.b'}}"
+ "]";
+ std::string serializedPipe =
+ "[{$sort : {'a.c': 1}}"
+ ",{$unwind : {path: '$a.b'}}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortSwapsBeforeUnwindMultipleSortPaths) {
+ std::string inputPipe =
+ "[{$unwind : {path: '$a'}}"
+ ",{$sort : {b: 1, c: 1}}"
+ "]";
+ std::string outputPipe =
+ "[{$sort : {sortKey: {b: 1, c: 1}}}"
+ ",{$unwind : {path: '$a'}}"
+ "]";
+ std::string serializedPipe =
+ "[{$sort : {b: 1, c: 1}}"
+ ",{$unwind : {path: '$a'}}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortDoesNotSwapBeforeUnwindMultipleSortPaths) {
+ std::string inputPipe =
+ "[{$unwind : {path: '$a'}}"
+ ",{$sort : {b: 1, a: 1}}"
+ "]";
+ std::string outputPipe =
+ "[{$unwind : {path: '$a'}}"
+ ",{$sort : {sortKey: {b: 1, a: 1}}}"
+ "]";
+ std::string serializedPipe =
+ "[{$unwind : {path: '$a'}}"
+ ",{$sort : {b: 1, a: 1}}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortDoesNotSwapBeforeUnwindBecauseSortPathPrefixOfUnwindPath) {
+ std::string inputPipe =
+ "[{$unwind : {path: '$b.a'}}"
+ ",{$sort : {b: 1}}"
+ "]";
+ std::string outputPipe =
+ "[{$unwind : {path: '$b.a'}}"
+ ",{$sort : {sortKey: {b: 1}}}"
+ "]";
+ std::string serializedPipe =
+ "[{$unwind : {path: '$b.a'}}"
+ ",{$sort : {b: 1}}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortDoesNotSwapBeforeUnwindBecauseUnwindPathPrefixOfSortPath) {
+ std::string inputPipe =
+ "[{$unwind : {path: '$b'}}"
+ ",{$sort : {'b.a': 1}}"
+ "]";
+ std::string outputPipe =
+ "[{$unwind : {path: '$b'}}"
+ ",{$sort : {sortKey: {'b.a': 1}}}"
+ "]";
+ std::string serializedPipe =
+ "[{$unwind : {path: '$b'}}"
+ ",{$sort : {'b.a': 1}}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortDoesNotSwapBeforeUnwindBecauseUnwindPathEqualToSortPath) {
+ std::string inputPipe =
+ "[{$unwind : {path: '$a.b'}}"
+ ",{$sort : {'a.b': 1}}"
+ "]";
+ std::string outputPipe =
+ "[{$unwind : {path: '$a.b'}}"
+ ",{$sort : {sortKey: {'a.b': 1}}}"
+ "]";
+ std::string serializedPipe =
+ "[{$unwind : {path: '$a.b'}}"
+ ",{$sort : {'a.b': 1}}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, LookupShouldCoalesceWithUnwindOnAsSortDoesNotInterfere) {
+ string inputPipe =
+ "[{$lookup: {from : 'lookupColl', as : 'same', localField: 'left', foreignField: "
+ "'right'}}"
+ ",{$unwind: {path: '$same'}}"
+ ",{$sort : {'a.b': 1}}"
+ "]";
+ string outputPipe =
+ "[{$lookup: {from : 'lookupColl', as : 'same', localField: 'left', foreignField: "
+ "'right', unwinding: {preserveNullAndEmptyArrays: false}}}"
+ ",{$sort : {sortKey: {'a.b': 1}}}]";
+ string serializedPipe =
+ "[{$lookup: {from : 'lookupColl', as : 'same', localField: 'left', foreignField: "
+ "'right'}}"
+ ",{$unwind: {path: '$same'}}"
+ ",{$sort : {'a.b': 1}}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortSwapsBeforeUnwindMetaWithFieldPath) {
+ std::string inputPipe =
+ "[{ $match: { $text: { $search: \"operating\" } }}"
+ ",{$unwind : {path: '$a'}}"
+ ",{$sort : {score: {$meta: \"textScore\"}, c: 1}}"
+ "]";
+ std::string outputPipe =
+ "[{$match: {$text: {$search: \"operating\", $language: \"\", $caseSensitive: false, "
+ "$diacriticSensitive: false}}}"
+ ",{$sort: {sortKey: {$computed0: {$meta: \"textScore\"}, c: 1}}}"
+ ",{$unwind : {path: '$a'}}"
+ "]";
+ std::string serializedPipe =
+ "[{ $match: { $text: { $search: \"operating\" } }}"
+ ",{$sort: {$computed0: {$meta: \"textScore\"}, c: 1}}"
+ ",{$unwind : {path: '$a'}}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, SortSwapsBeforeUnwindMetaWithoutFieldPath) {
+ std::string inputPipe =
+ "[{ $match: { $text: { $search: \"operating\" } }}"
+ ",{$unwind : {path: '$a'}}"
+ ",{$sort : {score: {$meta: \"textScore\"}}}"
+ "]";
+ std::string outputPipe =
+ "[{$match: {$text: {$search: \"operating\", $language: \"\", $caseSensitive: false, "
+ "$diacriticSensitive: false}}}"
+ ",{$sort: {sortKey: {$computed0: {$meta: \"textScore\"}}}}"
+ ",{$unwind : {path: '$a'}}"
+ "]";
+ std::string serializedPipe =
+ "[{ $match: { $text: { $search: \"operating\" } }}"
+ ",{$sort: {$computed0: {$meta: \"textScore\"}}}"
+ ",{$unwind : {path: '$a'}}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
TEST(PipelineOptimizationTest, SortMatchProjSkipLimBecomesMatchTopKSortSkipProj) {
std::string inputPipe =
"[{$sort: {a: 1}}"