summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2016-10-03 11:48:27 -0400
committerDavid Storch <david.storch@10gen.com>2016-10-06 11:19:24 -0400
commit4976eb401b3f16482fb35287ea60d7e8b33fe76d (patch)
tree3ab5cfaec3f76dc334b006f0b1eeb7162772d4d6
parenta2d34afd4330258e05c5ac1e9174338cfda29a10 (diff)
downloadmongo-4976eb401b3f16482fb35287ea60d7e8b33fe76d.tar.gz
SERVER-19153 push $match as far forward as possible
-rw-r--r--src/mongo/db/matcher/expression_algo.h6
-rw-r--r--src/mongo/db/matcher/expression_algo_test.cpp22
-rw-r--r--src/mongo/db/pipeline/document_source.cpp102
-rw-r--r--src/mongo/db/pipeline/document_source.h291
-rw-r--r--src/mongo/db/pipeline/document_source_cursor.cpp2
-rw-r--r--src/mongo/db/pipeline/document_source_geo_near.cpp2
-rw-r--r--src/mongo/db/pipeline/document_source_graph_lookup.cpp13
-rw-r--r--src/mongo/db/pipeline/document_source_graph_lookup_test.cpp47
-rw-r--r--src/mongo/db/pipeline/document_source_limit.cpp2
-rw-r--r--src/mongo/db/pipeline/document_source_lookup.cpp86
-rw-r--r--src/mongo/db/pipeline/document_source_lookup_test.cpp48
-rw-r--r--src/mongo/db/pipeline/document_source_match.cpp11
-rw-r--r--src/mongo/db/pipeline/document_source_project_test.cpp50
-rw-r--r--src/mongo/db/pipeline/document_source_redact.cpp2
-rw-r--r--src/mongo/db/pipeline/document_source_replace_root.cpp15
-rw-r--r--src/mongo/db/pipeline/document_source_replace_root_test.cpp8
-rw-r--r--src/mongo/db/pipeline/document_source_single_document_transformation.cpp7
-rw-r--r--src/mongo/db/pipeline/document_source_skip.cpp2
-rw-r--r--src/mongo/db/pipeline/document_source_sort.cpp8
-rw-r--r--src/mongo/db/pipeline/document_source_sort_test.cpp7
-rw-r--r--src/mongo/db/pipeline/document_source_unwind.cpp41
-rw-r--r--src/mongo/db/pipeline/document_source_unwind_test.cpp25
-rw-r--r--src/mongo/db/pipeline/parsed_add_fields.h5
-rw-r--r--src/mongo/db/pipeline/parsed_exclusion_projection.cpp10
-rw-r--r--src/mongo/db/pipeline/parsed_exclusion_projection.h9
-rw-r--r--src/mongo/db/pipeline/parsed_exclusion_projection_test.cpp23
-rw-r--r--src/mongo/db/pipeline/parsed_inclusion_projection.cpp12
-rw-r--r--src/mongo/db/pipeline/parsed_inclusion_projection.h11
-rw-r--r--src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp42
-rw-r--r--src/mongo/db/pipeline/pipeline_test.cpp140
30 files changed, 853 insertions, 196 deletions
diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h
index c8747acac33..c4316213827 100644
--- a/src/mongo/db/matcher/expression_algo.h
+++ b/src/mongo/db/matcher/expression_algo.h
@@ -28,6 +28,12 @@
* it in the license file.
*/
+#pragma once
+
+#include <memory>
+#include <set>
+
+#include "mongo/base/string_data.h"
#include "mongo/stdx/functional.h"
namespace mongo {
diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp
index e8fe53ddc93..c97d766679a 100644
--- a/src/mongo/db/matcher/expression_algo_test.cpp
+++ b/src/mongo/db/matcher/expression_algo_test.cpp
@@ -906,6 +906,28 @@ TEST(SplitMatchExpression, ComplexMatchExpressionSplitsCorrectly) {
"{$eq: 1}}]}]}, {y: {$eq: 3}}]}]}]}"));
}
+TEST(SplitMatchExpression, ShouldNotExtractPrefixOfDottedPathAsIndependent) {
+ BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {'a.b': 1}, {'a.c': 1}]}");
+ const CollatorInterface* collator = nullptr;
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator);
+ ASSERT_OK(status.getStatus());
+
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"a.b"});
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+
+ ASSERT_BSONOBJ_EQ(firstBob.obj(), fromjson("{'a.c': {$eq: 1}}"));
+ ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{$and: [{a: {$eq: 1}}, {'a.b': {$eq: 1}}]}"));
+}
+
TEST(MapOverMatchExpression, DoesMapOverLogicalNodes) {
BSONObj matchPredicate = fromjson("{a: {$not: {$eq: 1}}}");
const CollatorInterface* collator = nullptr;
diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp
index ae456bf035d..a78c307fad3 100644
--- a/src/mongo/db/pipeline/document_source.cpp
+++ b/src/mongo/db/pipeline/document_source.cpp
@@ -28,6 +28,7 @@
#include "mongo/platform/basic.h"
+#include "mongo/db/matcher/expression_algo.h"
#include "mongo/db/pipeline/document_source.h"
#include "mongo/db/pipeline/expression_context.h"
#include "mongo/db/pipeline/value.h"
@@ -88,6 +89,107 @@ intrusive_ptr<DocumentSource> DocumentSource::optimize() {
return this;
}
+namespace {
+
+/**
+ * Given a set of paths 'dependencies', determines which of those paths will be modified if all
+ * paths except those in 'preservedPaths' are modified.
+ *
+ * For example, extractModifiedDependencies({'a', 'b', 'c.d', 'e'}, {'a', 'b.c', c'}) returns
+ * {'b', 'e'}, since 'b' and 'e' are not preserved (only 'b.c' is preserved).
+ */
+std::set<std::string> extractModifiedDependencies(const std::set<std::string>& dependencies,
+ const std::set<std::string>& preservedPaths) {
+ std::set<std::string> modifiedDependencies;
+
+ // The modified dependencies is *almost* the set difference 'dependencies' - 'preservedPaths',
+ // except that if p in 'preservedPaths' is a "path prefix" of d in 'dependencies', then 'd'
+ // should not be included in the modified dependencies.
+ for (auto&& dependency : dependencies) {
+ bool preserved = false;
+ auto depAsPath = FieldPath(dependency);
+ auto firstField = depAsPath.getFieldName(0);
+ // If even a prefix is preserved, the path is preserved, so search for any prefixes of
+ // 'dependency' as well. 'preservedPaths' is an *ordered* set, so we only have to search the
+ // range ['firstField', 'dependency'] to find any prefixes of 'dependency'.
+ for (auto it = preservedPaths.lower_bound(firstField);
+ it != preservedPaths.upper_bound(dependency);
+ ++it) {
+ if (*it == dependency || expression::isPathPrefixOf(*it, dependency)) {
+ preserved = true;
+ break;
+ }
+ }
+ if (!preserved) {
+ modifiedDependencies.insert(dependency);
+ }
+ }
+ return modifiedDependencies;
+}
+
+/**
+ * Returns a pair of pointers to $match stages, either of which can be null. The first entry in the
+ * pair is a $match stage that can be moved before this stage, the second is a $match stage that
+ * must remain after this stage.
+ */
+std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
+splitMatchByModifiedFields(const boost::intrusive_ptr<DocumentSourceMatch>& match,
+ const DocumentSource::GetModPathsReturn& modifiedPathsRet) {
+ // Attempt to move some or all of this $match before this stage.
+ std::set<std::string> modifiedPaths;
+ switch (modifiedPathsRet.type) {
+ case DocumentSource::GetModPathsReturn::Type::kNotSupported:
+ // We don't know what paths this stage might modify, so refrain from swapping.
+ return {nullptr, match};
+ case DocumentSource::GetModPathsReturn::Type::kAllPaths:
+ // This stage modifies all paths, so cannot be swapped with a $match at all.
+ return {nullptr, match};
+ case DocumentSource::GetModPathsReturn::Type::kFiniteSet:
+ modifiedPaths = std::move(modifiedPathsRet.paths);
+ break;
+ case DocumentSource::GetModPathsReturn::Type::kAllExcept: {
+ DepsTracker depsTracker;
+ match->getDependencies(&depsTracker);
+ modifiedPaths = extractModifiedDependencies(depsTracker.fields, modifiedPathsRet.paths);
+ }
+ }
+ return match->splitSourceBy(modifiedPaths);
+}
+
+} // namespace
+
+Pipeline::SourceContainer::iterator DocumentSource::optimizeAt(
+ Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
+ invariant(*itr == this && (std::next(itr) != container->end()));
+ auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get());
+ if (canSwapWithMatch() && nextMatch && !nextMatch->isTextQuery()) {
+ // We're allowed to swap with a $match and the stage after us is a $match. Furthermore, the
+ // $match does not contain a text search predicate, which we do not attempt to optimize
+ // because such a $match must already be the first stage in the pipeline. We can attempt to
+ // swap the $match or part of the $match before ourselves.
+ auto splitMatch = splitMatchByModifiedFields(nextMatch, getModifiedPaths());
+ invariant(splitMatch.first || splitMatch.second);
+
+ if (splitMatch.first) {
+ // At least part of the $match can be moved before this stage. Erase the original $match
+ // and put the independent part before this stage. If splitMatch.second is not null,
+ // then there is a new $match stage to insert after ourselves which is dependent on the
+ // modified fields.
+ container->erase(std::next(itr));
+ container->insert(itr, std::move(splitMatch.first));
+ if (splitMatch.second) {
+ container->insert(std::next(itr), std::move(splitMatch.second));
+ }
+
+ // The stage before the new $match may be able to optimize further, if there is such a
+ // stage.
+ return std::prev(itr) == container->begin() ? std::prev(itr)
+ : std::prev(std::prev(itr));
+ }
+ }
+ return doOptimizeAt(itr, container);
+}
+
void DocumentSource::dispose() {
if (pSource) {
pSource->dispose();
diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h
index 2d780df649c..d5d3c02f07b 100644
--- a/src/mongo/db/pipeline/document_source.h
+++ b/src/mongo/db/pipeline/document_source.h
@@ -255,57 +255,6 @@ public:
virtual void setSource(DocumentSource* pSource);
/**
- * Gets a BSONObjSet representing the sort order(s) of the output of the stage.
- */
- virtual BSONObjSet getOutputSorts() {
- return SimpleBSONObjComparator::kInstance.makeBSONObjSet();
- }
-
- /**
- * Returns an optimized DocumentSource that is semantically equivalent to this one, or
- * nullptr if this stage is a no-op. Implementations are allowed to modify themselves
- * in-place and return a pointer to themselves. For best results, first optimize the pipeline
- * with the optimizePipeline() method defined in pipeline.cpp.
- *
- * This is intended for any operations that include expressions, and provides a hook for
- * those to optimize those operations.
- *
- * The default implementation is to do nothing and return yourself.
- */
- virtual boost::intrusive_ptr<DocumentSource> optimize();
-
- /**
- * Attempt to perform an optimization with the following source in the pipeline. 'container'
- * refers to the entire pipeline, and 'itr' points to this stage within the pipeline.
- *
- * The return value is an iterator over the same container which points to the first location
- * in the container at which an optimization may be possible.
- *
- * For example, if a swap takes place, the returned iterator should just be the position
- * directly preceding 'itr', if such a position exists, since the stage at that position may be
- * able to perform further optimizations with its new neighbor.
- */
- virtual Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
- Pipeline::SourceContainer* container) {
- return std::next(itr);
- };
-
- enum GetDepsReturn {
- NOT_SUPPORTED = 0x0, // The full object and all metadata may be required
- SEE_NEXT = 0x1, // Later stages could need either fields or metadata
- EXHAUSTIVE_FIELDS = 0x2, // Later stages won't need more fields from input
- EXHAUSTIVE_META = 0x4, // Later stages won't need more metadata from input
- EXHAUSTIVE_ALL = EXHAUSTIVE_FIELDS | EXHAUSTIVE_META, // Later stages won't need either
- };
-
- /**
- * Get the dependencies this operation needs to do its job.
- */
- virtual GetDepsReturn getDependencies(DepsTracker* deps) const {
- return NOT_SUPPORTED;
- }
-
- /**
* In the default case, serializes the DocumentSource and adds it to the std::vector<Value>.
*
* A subclass may choose to overwrite this, rather than serialize,
@@ -378,6 +327,134 @@ public:
*/
static BSONObjSet truncateSortSet(const BSONObjSet& sorts, const std::set<std::string>& fields);
+ //
+ // Optimization API - These methods give each DocumentSource an opportunity to apply any local
+ // optimizations, and to provide any rule-based optimizations to swap with or absorb subsequent
+ // stages.
+ //
+
+ /**
+ * The non-virtual public interface for optimization. Attempts to do some generic optimizations
+ * such as pushing $matches as early in the pipeline as possible, then calls out to
+ * doOptimizeAt() for stage-specific optimizations.
+ *
+ * Subclasses should override doOptimizeAt() if they can apply some optimization(s) based on
+ * subsequent stages in the pipeline.
+ */
+ Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container);
+
+ /**
+ * Returns an optimized DocumentSource that is semantically equivalent to this one, or
+ * nullptr if this stage is a no-op. Implementations are allowed to modify themselves
+ * in-place and return a pointer to themselves. For best results, first optimize the pipeline
+ * with the optimizePipeline() method defined in pipeline.cpp.
+ *
+ * This is intended for any operations that include expressions, and provides a hook for
+ * those to optimize those operations.
+ *
+ * The default implementation is to do nothing and return yourself.
+ */
+ virtual boost::intrusive_ptr<DocumentSource> optimize();
+
+ //
+ // Property Analysis - These methods allow a DocumentSource to expose information about
+ // properties of themselves, such as which fields they need to apply their transformations, and
+ // whether or not they produce or preserve a sort order.
+ //
+ // Property analysis can be useful during optimization (e.g. analysis of sort orders determines
+ // whether or not a blocking group can be upgraded to a streaming group).
+ //
+
+ /**
+ * Gets a BSONObjSet representing the sort order(s) of the output of the stage.
+ */
+ virtual BSONObjSet getOutputSorts() {
+ return SimpleBSONObjComparator::kInstance.makeBSONObjSet();
+ }
+
+ struct GetModPathsReturn {
+ enum class Type {
+ // No information is available about which paths are modified.
+ kNotSupported,
+
+ // All fields will be modified. This should be used by stages like $replaceRoot which
+ // modify the entire document.
+ kAllPaths,
+
+ // A finite set of paths will be modified by this stage. This is true for something like
+ // {$project: {a: 0, b: 0}}, which will only modify 'a' and 'b', and leave all other
+ // paths unmodified.
+ kFiniteSet,
+
+ // This stage will modify an infinite set of paths, but we know which paths it will not
+ // modify. For example, the stage {$project: {_id: 1, a: 1}} will leave only the fields
+ // '_id' and 'a' unmodified, but all other fields will be projected out.
+ kAllExcept,
+ };
+
+ GetModPathsReturn(Type type, std::set<std::string>&& paths)
+ : type(type), paths(std::move(paths)) {}
+
+ Type type;
+ std::set<std::string> paths;
+ };
+
+ /**
+ * Returns information about which paths are added, removed, or updated by this stage. The
+ * default implementation uses kNotSupported to indicate that the set of modified paths for this
+ * stage is not known.
+ *
+ * See GetModPathsReturn above for the possible return values and what they mean.
+ */
+ virtual GetModPathsReturn getModifiedPaths() const {
+ return {GetModPathsReturn::Type::kNotSupported, std::set<std::string>{}};
+ }
+
+ /**
+ * Returns whether this stage can swap with a subsequent $match stage, provided that the match
+ * does not depend on the paths returned by getModifiedPaths().
+ *
+ * Subclasses which want to participate in match swapping should override this to return true.
+ * Such a subclass must also override getModifiedPaths() to provide information about which
+ * $match predicates be swapped before itself.
+ */
+ virtual bool canSwapWithMatch() const {
+ return false;
+ }
+
+ enum GetDepsReturn {
+ // The full object and all metadata may be required.
+ NOT_SUPPORTED = 0x0,
+
+ // Later stages could need either fields or metadata. For example, a $limit stage will pass
+ // through all fields, and they may or may not be needed by future stages.
+ SEE_NEXT = 0x1,
+
+ // Later stages won't need more fields from input. For example, an inclusion projection like
+ // {_id: 1, a: 1} will only output two fields, so future stages cannot possibly depend on
+ // any other fields.
+ EXHAUSTIVE_FIELDS = 0x2,
+
+ // Later stages won't need more metadata from input. For example, a $group stage will group
+ // documents together, discarding their text score.
+ EXHAUSTIVE_META = 0x4,
+
+ // Later stages won't need either fields or metadata.
+ EXHAUSTIVE_ALL = EXHAUSTIVE_FIELDS | EXHAUSTIVE_META,
+ };
+
+ /**
+ * Get the dependencies this operation needs to do its job. If overridden, subclasses must add
+ * all paths needed to apply their transformation to 'deps->fields', and call
+ * 'deps->setNeedTextScore()' if the text score is required.
+ *
+ * See GetDepsReturn above for the possible return values and what they mean.
+ */
+ virtual GetDepsReturn getDependencies(DepsTracker* deps) const {
+ return NOT_SUPPORTED;
+ }
+
protected:
/**
Base constructor.
@@ -393,6 +470,24 @@ protected:
*/
virtual void doInjectExpressionContext() {}
+ /**
+ * Attempt to perform an optimization with the following source in the pipeline. 'container'
+ * refers to the entire pipeline, and 'itr' points to this stage within the pipeline. The caller
+ * must guarantee that std::next(itr) != container->end().
+ *
+ * The return value is an iterator over the same container which points to the first location
+ * in the container at which an optimization may be possible.
+ *
+ * For example, if a swap takes place, the returned iterator should just be the position
+ * directly preceding 'itr', if such a position exists, since the stage at that position may be
+ * able to perform further optimizations with its new neighbor.
+ */
+ virtual Pipeline::SourceContainer::iterator doOptimizeAt(
+ Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
+ return std::next(itr);
+ };
+
+
/*
Most DocumentSources have an underlying source they get their data
from. This is a convenience for them.
@@ -577,8 +672,8 @@ public:
/**
* Attempts to combine with any subsequent $limit stages by setting the internal '_limit' field.
*/
- Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
- Pipeline::SourceContainer* container) final;
+ Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
Value serialize(bool explain = false) const final;
bool isValidInitialSource() const final {
return true;
@@ -879,12 +974,13 @@ public:
return pSource ? pSource->getOutputSorts()
: SimpleBSONObjComparator::kInstance.makeBSONObjSet();
}
+
/**
* Attempts to combine with any subsequent $match stages, joining the query objects with a
* $and.
*/
- Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
- Pipeline::SourceContainer* container) final;
+ Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
void setSource(DocumentSource* Source) final;
GetDepsReturn getDependencies(DepsTracker* deps) const final;
@@ -947,7 +1043,7 @@ public:
* For example, {$match: {a: "foo", "b.c": 4}} split by "b" will return pointers to two stages:
* {$match: {a: "foo"}}, and {$match: {"b.c": 4}}.
*/
- std::pair<boost::intrusive_ptr<DocumentSource>, boost::intrusive_ptr<DocumentSource>>
+ std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
splitSourceBy(const std::set<std::string>& fields);
/**
@@ -1143,6 +1239,7 @@ public:
virtual DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const = 0;
virtual void injectExpressionContext(
const boost::intrusive_ptr<ExpressionContext>& pExpCtx) = 0;
+ virtual GetModPathsReturn getModifiedPaths() const = 0;
};
DocumentSourceSingleDocumentTransformation(
@@ -1156,10 +1253,15 @@ public:
boost::intrusive_ptr<DocumentSource> optimize() final;
void dispose() final;
Value serialize(bool explain) const final;
- Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
- Pipeline::SourceContainer* container) final;
+ Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
void doInjectExpressionContext() final;
DocumentSource::GetDepsReturn getDependencies(DepsTracker* deps) const final;
+ GetModPathsReturn getModifiedPaths() const final;
+
+ bool canSwapWithMatch() const final {
+ return true;
+ }
private:
// Stores transformation logic.
@@ -1250,8 +1352,8 @@ public:
* Attempts to duplicate the redact-safe portion of a subsequent $match before the $redact
* stage.
*/
- Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
- Pipeline::SourceContainer* container) final;
+ Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
void doInjectExpressionContext() final;
@@ -1387,8 +1489,8 @@ public:
/**
* Attempts to combine with a subsequent $limit stage, setting 'limit' appropriately.
*/
- Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
- Pipeline::SourceContainer* container) final;
+ Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
Value serialize(bool explain = false) const final;
GetDepsReturn getDependencies(DepsTracker* deps) const final {
@@ -1450,17 +1552,24 @@ public:
const char* getSourceName() const final;
void serializeToArray(std::vector<Value>& array, bool explain = false) const final;
+ GetModPathsReturn getModifiedPaths() const final {
+ // A $sort does not modify any paths.
+ return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}};
+ }
+
+ bool canSwapWithMatch() const final {
+ return true;
+ }
+
BSONObjSet getOutputSorts() final {
return allPrefixes(_sort);
}
/**
- * Attempts to move a subsequent $match stage before the $sort, reducing the number of
- * documents that pass through the stage. Also attempts to absorb a subsequent $limit stage so
- * that it an perform a top-k sort.
+ * Attempts to absorb a subsequent $limit stage so that it an perform a top-k sort.
*/
- Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
- Pipeline::SourceContainer* container) final;
+ Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
void dispose() final;
GetDepsReturn getDependencies(DepsTracker* deps) const final;
@@ -1601,8 +1710,8 @@ public:
* Attempts to move a subsequent $limit before the skip, potentially allowing for forther
* optimizations earlier in the pipeline.
*/
- Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
- Pipeline::SourceContainer* container) final;
+ Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
Value serialize(bool explain = false) const final;
boost::intrusive_ptr<DocumentSource> optimize() final;
BSONObjSet getOutputSorts() final {
@@ -1661,14 +1770,16 @@ public:
Value serialize(bool explain = false) const final;
BSONObjSet getOutputSorts() final;
- GetDepsReturn getDependencies(DepsTracker* deps) const final;
-
/**
- * If the next stage is a $match, the part of the match that is not dependent on the unwound
- * field can be moved into a new, preceding, $match stage.
+ * Returns the unwound path, and the 'includeArrayIndex' path, if specified.
*/
- Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
- Pipeline::SourceContainer* container) final;
+ GetModPathsReturn getModifiedPaths() const final;
+
+ bool canSwapWithMatch() const final {
+ return true;
+ }
+
+ GetDepsReturn getDependencies(DepsTracker* deps) const final;
/**
* Creates a new $unwind DocumentSource from a BSON specification.
@@ -1725,8 +1836,8 @@ public:
* Attempts to combine with a subsequent limit stage, setting the internal limit field
* as a result.
*/
- Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
- Pipeline::SourceContainer* container) final;
+ Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
bool isValidInitialSource() const final {
return true;
}
@@ -1797,11 +1908,20 @@ public:
void serializeToArray(std::vector<Value>& array, bool explain = false) const final;
/**
+ * Returns the 'as' path, and possibly fields modified by an absorbed $unwind.
+ */
+ GetModPathsReturn getModifiedPaths() const final;
+
+ bool canSwapWithMatch() const final {
+ return true;
+ }
+
+ /**
* Attempts to combine with a subsequent $unwind stage, setting the internal '_unwindSrc'
* field.
*/
- Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
- Pipeline::SourceContainer* container) final;
+ Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
GetDepsReturn getDependencies(DepsTracker* deps) const final;
void dispose() final;
@@ -1906,10 +2026,19 @@ public:
void serializeToArray(std::vector<Value>& array, bool explain = false) const final;
/**
+ * Returns the 'as' path, and possibly the fields modified by an absorbed $unwind.
+ */
+ GetModPathsReturn getModifiedPaths() const final;
+
+ bool canSwapWithMatch() const final {
+ return true;
+ }
+
+ /**
* Attempts to combine with a subsequent $unwind stage, setting the internal '_unwind' field.
*/
- Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
- Pipeline::SourceContainer* container) final;
+ Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
GetDepsReturn getDependencies(DepsTracker* deps) const final {
_startWith->addDependencies(deps);
diff --git a/src/mongo/db/pipeline/document_source_cursor.cpp b/src/mongo/db/pipeline/document_source_cursor.cpp
index 132d6546235..e22e72c1ede 100644
--- a/src/mongo/db/pipeline/document_source_cursor.cpp
+++ b/src/mongo/db/pipeline/document_source_cursor.cpp
@@ -147,7 +147,7 @@ long long DocumentSourceCursor::getLimit() const {
return _limit ? _limit->getLimit() : -1;
}
-Pipeline::SourceContainer::iterator DocumentSourceCursor::optimizeAt(
+Pipeline::SourceContainer::iterator DocumentSourceCursor::doOptimizeAt(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
invariant(*itr == this);
diff --git a/src/mongo/db/pipeline/document_source_geo_near.cpp b/src/mongo/db/pipeline/document_source_geo_near.cpp
index 62914105716..1dd78b75a09 100644
--- a/src/mongo/db/pipeline/document_source_geo_near.cpp
+++ b/src/mongo/db/pipeline/document_source_geo_near.cpp
@@ -71,7 +71,7 @@ DocumentSource::GetNextResult DocumentSourceGeoNear::getNext() {
return output.freeze();
}
-Pipeline::SourceContainer::iterator DocumentSourceGeoNear::optimizeAt(
+Pipeline::SourceContainer::iterator DocumentSourceGeoNear::doOptimizeAt(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
invariant(*itr == this);
diff --git a/src/mongo/db/pipeline/document_source_graph_lookup.cpp b/src/mongo/db/pipeline/document_source_graph_lookup.cpp
index 2e8b5131d01..7b6d6c0b22a 100644
--- a/src/mongo/db/pipeline/document_source_graph_lookup.cpp
+++ b/src/mongo/db/pipeline/document_source_graph_lookup.cpp
@@ -374,7 +374,18 @@ void DocumentSourceGraphLookUp::performSearch() {
doBreadthFirstSearch();
}
-Pipeline::SourceContainer::iterator DocumentSourceGraphLookUp::optimizeAt(
+DocumentSource::GetModPathsReturn DocumentSourceGraphLookUp::getModifiedPaths() const {
+ std::set<std::string> modifiedPaths{_as.fullPath()};
+ if (_unwind) {
+ auto pathsModifiedByUnwind = _unwind.get()->getModifiedPaths();
+ invariant(pathsModifiedByUnwind.type == GetModPathsReturn::Type::kFiniteSet);
+ modifiedPaths.insert(pathsModifiedByUnwind.paths.begin(),
+ pathsModifiedByUnwind.paths.end());
+ }
+ return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)};
+}
+
+Pipeline::SourceContainer::iterator DocumentSourceGraphLookUp::doOptimizeAt(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
invariant(*itr == this);
diff --git a/src/mongo/db/pipeline/document_source_graph_lookup_test.cpp b/src/mongo/db/pipeline/document_source_graph_lookup_test.cpp
index 6f74a0d6280..ac25e48936e 100644
--- a/src/mongo/db/pipeline/document_source_graph_lookup_test.cpp
+++ b/src/mongo/db/pipeline/document_source_graph_lookup_test.cpp
@@ -389,5 +389,52 @@ TEST_F(DocumentSourceGraphLookUpTest, ShouldPropagatePausesWhileUnwinding) {
ASSERT_TRUE(graphLookupStage->getNext().isEOF());
}
+TEST_F(DocumentSourceGraphLookUpTest, GraphLookupShouldReportAsFieldIsModified) {
+ auto expCtx = getExpCtx();
+ NamespaceString fromNs("test", "foreign");
+ expCtx->resolvedNamespaces[fromNs.coll()] = {fromNs, std::vector<BSONObj>{}};
+ auto graphLookupStage =
+ DocumentSourceGraphLookUp::create(expCtx,
+ fromNs,
+ "results",
+ "from",
+ "to",
+ ExpressionFieldPath::create("startPoint"),
+ boost::none,
+ boost::none,
+ boost::none,
+ boost::none);
+
+ auto modifiedPaths = graphLookupStage->getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet);
+ ASSERT_EQ(1U, modifiedPaths.paths.size());
+ ASSERT_EQ(1U, modifiedPaths.paths.count("results"));
+}
+
+TEST_F(DocumentSourceGraphLookUpTest, GraphLookupShouldReportFieldsModifiedByAbsorbedUnwind) {
+ auto expCtx = getExpCtx();
+ NamespaceString fromNs("test", "foreign");
+ expCtx->resolvedNamespaces[fromNs.coll()] = {fromNs, std::vector<BSONObj>{}};
+ auto unwindStage =
+ DocumentSourceUnwind::create(expCtx, "results", false, std::string("arrIndex"));
+ auto graphLookupStage =
+ DocumentSourceGraphLookUp::create(expCtx,
+ fromNs,
+ "results",
+ "from",
+ "to",
+ ExpressionFieldPath::create("startPoint"),
+ boost::none,
+ boost::none,
+ boost::none,
+ unwindStage);
+
+ auto modifiedPaths = graphLookupStage->getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet);
+ ASSERT_EQ(2U, modifiedPaths.paths.size());
+ ASSERT_EQ(1U, modifiedPaths.paths.count("results"));
+ ASSERT_EQ(1U, modifiedPaths.paths.count("arrIndex"));
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_limit.cpp b/src/mongo/db/pipeline/document_source_limit.cpp
index cdc3ed2d0a6..75ca17eca48 100644
--- a/src/mongo/db/pipeline/document_source_limit.cpp
+++ b/src/mongo/db/pipeline/document_source_limit.cpp
@@ -53,7 +53,7 @@ const char* DocumentSourceLimit::getSourceName() const {
return "$limit";
}
-Pipeline::SourceContainer::iterator DocumentSourceLimit::optimizeAt(
+Pipeline::SourceContainer::iterator DocumentSourceLimit::doOptimizeAt(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
invariant(*itr == this);
diff --git a/src/mongo/db/pipeline/document_source_lookup.cpp b/src/mongo/db/pipeline/document_source_lookup.cpp
index 4607d9a28e8..7a0727bcf45 100644
--- a/src/mongo/db/pipeline/document_source_lookup.cpp
+++ b/src/mongo/db/pipeline/document_source_lookup.cpp
@@ -159,7 +159,18 @@ DocumentSource::GetNextResult DocumentSourceLookUp::getNext() {
return output.freeze();
}
-Pipeline::SourceContainer::iterator DocumentSourceLookUp::optimizeAt(
+DocumentSource::GetModPathsReturn DocumentSourceLookUp::getModifiedPaths() const {
+ std::set<std::string> modifiedPaths{_as.fullPath()};
+ if (_unwindSrc) {
+ auto pathsModifiedByUnwind = _unwindSrc->getModifiedPaths();
+ invariant(pathsModifiedByUnwind.type == GetModPathsReturn::Type::kFiniteSet);
+ modifiedPaths.insert(pathsModifiedByUnwind.paths.begin(),
+ pathsModifiedByUnwind.paths.end());
+ }
+ return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)};
+}
+
+Pipeline::SourceContainer::iterator DocumentSourceLookUp::doOptimizeAt(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
invariant(*itr == this);
@@ -174,52 +185,13 @@ Pipeline::SourceContainer::iterator DocumentSourceLookUp::optimizeAt(
return itr;
}
+ // Attempt to internalize any predicates of a $match upon the "_as" field.
auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get());
if (!nextMatch) {
return std::next(itr);
}
- // Attempt to move part of the $match before ourselves, and internalize any predicates upon the
- // "_as" field.
- std::string outputPath = _as.fullPath();
-
- std::set<std::string> fields = {outputPath};
- if (_handlingUnwind && _unwindSrc->indexPath()) {
- fields.insert((*_unwindSrc->indexPath()).fullPath());
- }
-
- // Attempt to split the $match, putting the independent portion before ourselves.
- auto splitMatch = nextMatch->splitSourceBy(fields);
-
- // Remove the original match from the pipeline.
- container->erase(std::next(itr));
-
- auto independent = dynamic_cast<DocumentSourceMatch*>(splitMatch.first.get());
- auto dependent = dynamic_cast<DocumentSourceMatch*>(splitMatch.second.get());
-
- invariant(independent || dependent);
-
- auto locationOfNextPossibleOptimization = std::next(itr);
- if (independent) {
- // If the $match has an independent portion, insert it before ourselves. Keep track of where
- // the pipeline should check for the next possible optimization.
- container->insert(itr, std::move(independent));
- if (std::prev(itr) == container->begin()) {
- locationOfNextPossibleOptimization = std::prev(itr);
- } else {
- locationOfNextPossibleOptimization = std::prev(std::prev(itr));
- }
- }
-
- if (!dependent) {
- // Nothing left to do; the entire $match was moved before us.
- return locationOfNextPossibleOptimization;
- }
-
- // Part of the $match was dependent upon us; we must now determine if we need to split the
- // $match again to obtain a $match that is a predicate only upon the "_as" path.
-
if (!_handlingUnwind || _unwindSrc->indexPath() || _unwindSrc->preserveNullAndEmptyArrays()) {
// We must be unwinding our result to internalize a $match. For example, consider the
// following pipeline:
@@ -248,10 +220,18 @@ Pipeline::SourceContainer::iterator DocumentSourceLookUp::optimizeAt(
// In addition, we must avoid internalizing a $match if an absorbed $unwind has an
// "includeArrayIndex" option, since the $match will alter the indices of the returned
// values.
- container->insert(std::next(itr), std::move(splitMatch.second));
- return locationOfNextPossibleOptimization;
+ return std::next(itr);
}
+ auto outputPath = _as.fullPath();
+
+ // Since $match splitting is handled in a generic way, we expect to have already swapped
+ // portions of the $match that do not depend on the 'as' path or on an internalized $unwind's
+ // index path before ourselves. But due to the early return above, we know there is no
+ // internalized $unwind with an index path.
+ //
+ // Therefore, 'nextMatch' should only depend on the 'as' path. We now try to absorb the match on
+ // the 'as' path in order to push down these predicates into the foreign collection.
bool isMatchOnlyOnAs = true;
auto computeWhetherMatchOnAs = [&isMatchOnlyOnAs, &outputPath](MatchExpression* expression,
std::string path) -> void {
@@ -271,24 +251,26 @@ Pipeline::SourceContainer::iterator DocumentSourceLookUp::optimizeAt(
}
};
- expression::mapOver(dependent->getMatchExpression(), computeWhetherMatchOnAs);
+ expression::mapOver(nextMatch->getMatchExpression(), computeWhetherMatchOnAs);
if (!isMatchOnlyOnAs) {
- // "dependent" is not wholly a predicate upon our "_as" field. We must put it back into the
- // pipeline as-is.
- container->insert(std::next(itr), std::move(splitMatch.second));
- return locationOfNextPossibleOptimization;
+ // "nextMatch" does not contain any predicates that can be absorbed into this stage.
+ return std::next(itr);
}
- // We can internalize the entire $match.
+ // We can internalize the $match.
if (!_handlingMatch) {
- _matchSrc = dependent;
+ _matchSrc = nextMatch;
_handlingMatch = true;
} else {
// We have already absorbed a $match. We need to join it with 'dependent'.
- _matchSrc->joinMatchWith(dependent);
+ _matchSrc->joinMatchWith(nextMatch);
}
- return locationOfNextPossibleOptimization;
+
+ // Remove the original $match. There may be further optimization between this $lookup and the
+ // new neighbor, so we return an iterator pointing to ourself.
+ container->erase(std::next(itr));
+ return itr;
}
void DocumentSourceLookUp::dispose() {
diff --git a/src/mongo/db/pipeline/document_source_lookup_test.cpp b/src/mongo/db/pipeline/document_source_lookup_test.cpp
index c42eebdb2f1..229b50906e9 100644
--- a/src/mongo/db/pipeline/document_source_lookup_test.cpp
+++ b/src/mongo/db/pipeline/document_source_lookup_test.cpp
@@ -269,5 +269,53 @@ TEST_F(DocumentSourceLookUpTest, ShouldPropagatePausesWhileUnwinding) {
ASSERT_TRUE(lookup->getNext().isEOF());
}
+TEST_F(DocumentSourceLookUpTest, LookupReportsAsFieldIsModified) {
+ auto expCtx = getExpCtx();
+ NamespaceString fromNs("test", "foreign");
+ expCtx->resolvedNamespaces[fromNs.coll()] = {fromNs, std::vector<BSONObj>{}};
+
+ // Set up the $lookup stage.
+ auto lookupSpec = Document{{"$lookup",
+ Document{{"from", fromNs.coll()},
+ {"localField", "foreignId"},
+ {"foreignField", "_id"},
+ {"as", "foreignDocs"}}}}
+ .toBson();
+ auto parsed = DocumentSourceLookUp::createFromBson(lookupSpec.firstElement(), expCtx);
+ auto lookup = static_cast<DocumentSourceLookUp*>(parsed.get());
+
+ auto modifiedPaths = lookup->getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet);
+ ASSERT_EQ(1U, modifiedPaths.paths.size());
+ ASSERT_EQ(1U, modifiedPaths.paths.count("foreignDocs"));
+}
+
+TEST_F(DocumentSourceLookUpTest, LookupReportsFieldsModifiedByAbsorbedUnwind) {
+ auto expCtx = getExpCtx();
+ NamespaceString fromNs("test", "foreign");
+ expCtx->resolvedNamespaces[fromNs.coll()] = {fromNs, std::vector<BSONObj>{}};
+
+ // Set up the $lookup stage.
+ auto lookupSpec = Document{{"$lookup",
+ Document{{"from", fromNs.coll()},
+ {"localField", "foreignId"},
+ {"foreignField", "_id"},
+ {"as", "foreignDoc"}}}}
+ .toBson();
+ auto parsed = DocumentSourceLookUp::createFromBson(lookupSpec.firstElement(), expCtx);
+ auto lookup = static_cast<DocumentSourceLookUp*>(parsed.get());
+
+ const bool preserveNullAndEmptyArrays = false;
+ const boost::optional<std::string> includeArrayIndex = std::string("arrIndex");
+ lookup->setUnwindStage(DocumentSourceUnwind::create(
+ expCtx, "foreignDoc", preserveNullAndEmptyArrays, includeArrayIndex));
+
+ auto modifiedPaths = lookup->getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet);
+ ASSERT_EQ(2U, modifiedPaths.paths.size());
+ ASSERT_EQ(1U, modifiedPaths.paths.count("foreignDoc"));
+ ASSERT_EQ(1U, modifiedPaths.paths.count("arrIndex"));
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp
index 8476c82a67d..d8df9116145 100644
--- a/src/mongo/db/pipeline/document_source_match.cpp
+++ b/src/mongo/db/pipeline/document_source_match.cpp
@@ -87,7 +87,7 @@ DocumentSource::GetNextResult DocumentSourceMatch::getNext() {
return nextInput;
}
-Pipeline::SourceContainer::iterator DocumentSourceMatch::optimizeAt(
+Pipeline::SourceContainer::iterator DocumentSourceMatch::doOptimizeAt(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
invariant(*itr == this);
@@ -364,7 +364,7 @@ void DocumentSourceMatch::joinMatchWith(intrusive_ptr<DocumentSourceMatch> other
_expression = std::move(status.getValue());
}
-pair<intrusive_ptr<DocumentSource>, intrusive_ptr<DocumentSource>>
+pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>>
DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields) {
pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> newExpr(
expression::splitMatchExpressionBy(std::move(_expression), fields));
@@ -390,15 +390,12 @@ DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields) {
BSONObjBuilder firstBob;
newExpr.first->serialize(&firstBob);
- intrusive_ptr<DocumentSource> firstMatch(new DocumentSourceMatch(firstBob.obj(), pExpCtx));
-
// This $match stage is still needed, so update the MatchExpression as needed.
BSONObjBuilder secondBob;
newExpr.second->serialize(&secondBob);
- intrusive_ptr<DocumentSource> secondMatch(new DocumentSourceMatch(secondBob.obj(), pExpCtx));
-
- return {firstMatch, secondMatch};
+ return {DocumentSourceMatch::create(firstBob.obj(), pExpCtx),
+ DocumentSourceMatch::create(secondBob.obj(), pExpCtx)};
}
boost::intrusive_ptr<DocumentSourceMatch> DocumentSourceMatch::descendMatchOnPath(
diff --git a/src/mongo/db/pipeline/document_source_project_test.cpp b/src/mongo/db/pipeline/document_source_project_test.cpp
index de2bd12e254..6dca42a98f3 100644
--- a/src/mongo/db/pipeline/document_source_project_test.cpp
+++ b/src/mongo/db/pipeline/document_source_project_test.cpp
@@ -191,5 +191,55 @@ TEST_F(ProjectStageTest, ExclusionShouldNotAddDependencies) {
ASSERT_EQUALS(false, dependencies.getNeedTextScore());
}
+TEST_F(ProjectStageTest, InclusionProjectionReportsIncludedPathsFromGetModifiedPaths) {
+ auto project = DocumentSourceProject::create(
+ fromjson("{a: true, 'b.c': {d: true}, e: {f: {g: true}}, h: {i: {$literal: true}}}"),
+ getExpCtx());
+
+ auto modifiedPaths = project->getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kAllExcept);
+ ASSERT_EQUALS(4U, modifiedPaths.paths.size());
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("_id"));
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("a"));
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("b.c.d"));
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("e.f.g"));
+}
+
+TEST_F(ProjectStageTest, InclusionProjectionReportsIncludedPathsButExcludesId) {
+ auto project = DocumentSourceProject::create(
+ fromjson("{_id: false, 'b.c': {d: true}, e: {f: {g: true}}, h: {i: {$literal: true}}}"),
+ getExpCtx());
+
+ auto modifiedPaths = project->getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kAllExcept);
+ ASSERT_EQUALS(2U, modifiedPaths.paths.size());
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("b.c.d"));
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("e.f.g"));
+}
+
+TEST_F(ProjectStageTest, ExclusionProjectionReportsExcludedPathsAsModifiedPaths) {
+ auto project = DocumentSourceProject::create(
+ fromjson("{a: false, 'b.c': {d: false}, e: {f: {g: false}}}"), getExpCtx());
+
+ auto modifiedPaths = project->getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet);
+ ASSERT_EQUALS(3U, modifiedPaths.paths.size());
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("a"));
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("b.c.d"));
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("e.f.g"));
+}
+
+TEST_F(ProjectStageTest, ExclusionProjectionReportsExcludedPathsWithIdExclusion) {
+ auto project = DocumentSourceProject::create(
+ fromjson("{_id: false, 'b.c': {d: false}, e: {f: {g: false}}}"), getExpCtx());
+
+ auto modifiedPaths = project->getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet);
+ ASSERT_EQUALS(3U, modifiedPaths.paths.size());
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("_id"));
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("b.c.d"));
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("e.f.g"));
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_redact.cpp b/src/mongo/db/pipeline/document_source_redact.cpp
index f7570c8c733..e5d8932b0b2 100644
--- a/src/mongo/db/pipeline/document_source_redact.cpp
+++ b/src/mongo/db/pipeline/document_source_redact.cpp
@@ -72,7 +72,7 @@ DocumentSource::GetNextResult DocumentSourceRedact::getNext() {
return nextInput;
}
-Pipeline::SourceContainer::iterator DocumentSourceRedact::optimizeAt(
+Pipeline::SourceContainer::iterator DocumentSourceRedact::doOptimizeAt(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
invariant(*itr == this);
diff --git a/src/mongo/db/pipeline/document_source_replace_root.cpp b/src/mongo/db/pipeline/document_source_replace_root.cpp
index 0c1c1a38e8c..5625e43b1ab 100644
--- a/src/mongo/db/pipeline/document_source_replace_root.cpp
+++ b/src/mongo/db/pipeline/document_source_replace_root.cpp
@@ -51,7 +51,7 @@ class ReplaceRootTransformation final
public:
ReplaceRootTransformation() {}
- Document applyTransformation(Document input) {
+ Document applyTransformation(Document input) final {
// Extract subdocument in the form of a Value.
_variables->setRoot(input);
Value newRoot = _newRoot->evaluate(_variables.get());
@@ -81,25 +81,30 @@ public:
}
// Optimize the newRoot expression.
- void optimize() {
+ void optimize() final {
_newRoot->optimize();
}
- Document serialize(bool explain) const {
+ Document serialize(bool explain) const final {
return Document{{"newRoot", _newRoot->serialize(explain)}};
}
- DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const {
+ DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const final {
_newRoot->addDependencies(deps);
// This stage will replace the entire document with a new document, so any existing fields
// will be replaced and cannot be required as dependencies.
return DocumentSource::EXHAUSTIVE_FIELDS;
}
- void injectExpressionContext(const boost::intrusive_ptr<ExpressionContext>& pExpCtx) {
+ void injectExpressionContext(const boost::intrusive_ptr<ExpressionContext>& pExpCtx) final {
_newRoot->injectExpressionContext(pExpCtx);
}
+ DocumentSource::GetModPathsReturn getModifiedPaths() const final {
+ // Replaces the entire root, so all paths are modified.
+ return {DocumentSource::GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}};
+ }
+
// Create the replaceRoot transformer. Uasserts on invalid input.
static std::unique_ptr<ReplaceRootTransformation> create(const BSONElement& spec) {
diff --git a/src/mongo/db/pipeline/document_source_replace_root_test.cpp b/src/mongo/db/pipeline/document_source_replace_root_test.cpp
index 0e01d4ac7be..16e6c2f5fa0 100644
--- a/src/mongo/db/pipeline/document_source_replace_root_test.cpp
+++ b/src/mongo/db/pipeline/document_source_replace_root_test.cpp
@@ -270,6 +270,14 @@ TEST_F(ReplaceRootBasics, OnlyDependentFieldIsNewRoot) {
ASSERT_EQUALS(false, dependencies.getNeedTextScore());
}
+TEST_F(ReplaceRootBasics, ReplaceRootModifiesAllFields) {
+ auto replaceRoot = createReplaceRoot(BSON("newRoot"
+ << "$a"));
+ auto modifiedPaths = replaceRoot->getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kAllPaths);
+ ASSERT_EQUALS(0U, modifiedPaths.paths.size());
+}
+
/**
* Fixture to test error cases of initializing the $replaceRoot stage.
*/
diff --git a/src/mongo/db/pipeline/document_source_single_document_transformation.cpp b/src/mongo/db/pipeline/document_source_single_document_transformation.cpp
index 222c3ebebdf..953bcc81e53 100644
--- a/src/mongo/db/pipeline/document_source_single_document_transformation.cpp
+++ b/src/mongo/db/pipeline/document_source_single_document_transformation.cpp
@@ -76,7 +76,7 @@ Value DocumentSourceSingleDocumentTransformation::serialize(bool explain) const
return Value(Document{{getSourceName(), _parsedTransform->serialize(explain)}});
}
-Pipeline::SourceContainer::iterator DocumentSourceSingleDocumentTransformation::optimizeAt(
+Pipeline::SourceContainer::iterator DocumentSourceSingleDocumentTransformation::doOptimizeAt(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
invariant(*itr == this);
auto nextSkip = dynamic_cast<DocumentSourceSkip*>((*std::next(itr)).get());
@@ -100,4 +100,9 @@ void DocumentSourceSingleDocumentTransformation::doInjectExpressionContext() {
_parsedTransform->injectExpressionContext(pExpCtx);
}
+DocumentSource::GetModPathsReturn DocumentSourceSingleDocumentTransformation::getModifiedPaths()
+ const {
+ return _parsedTransform->getModifiedPaths();
+}
+
} // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_skip.cpp b/src/mongo/db/pipeline/document_source_skip.cpp
index 137b9ac9483..e052c24895d 100644
--- a/src/mongo/db/pipeline/document_source_skip.cpp
+++ b/src/mongo/db/pipeline/document_source_skip.cpp
@@ -71,7 +71,7 @@ intrusive_ptr<DocumentSource> DocumentSourceSkip::optimize() {
return _nToSkip == 0 ? nullptr : this;
}
-Pipeline::SourceContainer::iterator DocumentSourceSkip::optimizeAt(
+Pipeline::SourceContainer::iterator DocumentSourceSkip::doOptimizeAt(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
invariant(*itr == this);
diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp
index e25867f476b..156b0499d20 100644
--- a/src/mongo/db/pipeline/document_source_sort.cpp
+++ b/src/mongo/db/pipeline/document_source_sort.cpp
@@ -138,11 +138,10 @@ Document DocumentSourceSort::serializeSortKey(bool explain) const {
return keyObj.freeze();
}
-Pipeline::SourceContainer::iterator DocumentSourceSort::optimizeAt(
+Pipeline::SourceContainer::iterator DocumentSourceSort::doOptimizeAt(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
invariant(*itr == this);
- auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get());
auto nextLimit = dynamic_cast<DocumentSourceLimit*>((*std::next(itr)).get());
if (nextLimit) {
@@ -150,11 +149,6 @@ Pipeline::SourceContainer::iterator DocumentSourceSort::optimizeAt(
setLimitSrc(nextLimit);
container->erase(std::next(itr));
return itr;
- } else if (nextMatch && !nextMatch->isTextQuery()) {
- // Swap the $match before the $sort, thus reducing the number of documents that pass into
- // this stage.
- std::swap(*itr, *std::next(itr));
- return itr == container->begin() ? itr : std::prev(itr);
}
return std::next(itr);
}
diff --git a/src/mongo/db/pipeline/document_source_sort_test.cpp b/src/mongo/db/pipeline/document_source_sort_test.cpp
index 04864c3cb2d..022b2040ba3 100644
--- a/src/mongo/db/pipeline/document_source_sort_test.cpp
+++ b/src/mongo/db/pipeline/document_source_sort_test.cpp
@@ -184,6 +184,13 @@ TEST_F(DocumentSourceSortTest, OutputSort) {
ASSERT_EQUALS(outputSort.size(), 2U);
}
+TEST_F(DocumentSourceSortTest, ReportsNoPathsModified) {
+ createSort(BSON("a" << 1 << "b.c" << -1));
+ auto modifiedPaths = sort()->getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet);
+ ASSERT_EQUALS(0U, modifiedPaths.paths.size());
+}
+
class DocumentSourceSortExecutionTest : public DocumentSourceSortTest {
public:
void checkResults(deque<DocumentSource::GetNextResult> inputDocs,
diff --git a/src/mongo/db/pipeline/document_source_unwind.cpp b/src/mongo/db/pipeline/document_source_unwind.cpp
index a03bd1e8034..02b8dc62355 100644
--- a/src/mongo/db/pipeline/document_source_unwind.cpp
+++ b/src/mongo/db/pipeline/document_source_unwind.cpp
@@ -232,43 +232,12 @@ BSONObjSet DocumentSourceUnwind::getOutputSorts() {
return out;
}
-Pipeline::SourceContainer::iterator DocumentSourceUnwind::optimizeAt(
- Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
- invariant(*itr == this);
-
- if (auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get())) {
- std::set<std::string> fields = {_unwindPath.fullPath()};
-
- if (_indexPath) {
- fields.insert((*_indexPath).fullPath());
- }
-
- auto splitMatch = nextMatch->splitSourceBy(fields);
-
- invariant(splitMatch.first || splitMatch.second);
-
- if (!splitMatch.first && splitMatch.second) {
- // No optimization was possible.
- return std::next(itr);
- }
-
- container->erase(std::next(itr));
-
- // If splitMatch.second is not null, then there is a new $match stage to insert after
- // ourselves.
- if (splitMatch.second) {
- container->insert(std::next(itr), std::move(splitMatch.second));
- }
-
- if (splitMatch.first) {
- container->insert(itr, std::move(splitMatch.first));
- if (std::prev(itr) == container->begin()) {
- return std::prev(itr);
- }
- return std::prev(std::prev(itr));
- }
+DocumentSource::GetModPathsReturn DocumentSourceUnwind::getModifiedPaths() const {
+ std::set<std::string> modifiedFields{_unwindPath.fullPath()};
+ if (_indexPath) {
+ modifiedFields.insert(_indexPath->fullPath());
}
- return std::next(itr);
+ return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedFields)};
}
Value DocumentSourceUnwind::serialize(bool explain) const {
diff --git a/src/mongo/db/pipeline/document_source_unwind_test.cpp b/src/mongo/db/pipeline/document_source_unwind_test.cpp
index 56ad90dca9a..a782d7b521f 100644
--- a/src/mongo/db/pipeline/document_source_unwind_test.cpp
+++ b/src/mongo/db/pipeline/document_source_unwind_test.cpp
@@ -717,6 +717,31 @@ TEST_F(UnwindStageTest, ShouldPropagatePauses) {
ASSERT_TRUE(unwind->getNext().isEOF());
}
+TEST_F(UnwindStageTest, UnwindOnlyModifiesUnwoundPathWhenNotIncludingIndex) {
+ const bool includeNullIfEmptyOrMissing = false;
+ const boost::optional<std::string> includeArrayIndex = boost::none;
+ auto unwind = DocumentSourceUnwind::create(
+ getExpCtx(), "array", includeNullIfEmptyOrMissing, includeArrayIndex);
+
+ auto modifiedPaths = unwind->getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet);
+ ASSERT_EQUALS(1U, modifiedPaths.paths.size());
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("array"));
+}
+
+TEST_F(UnwindStageTest, UnwindIncludesIndexPathWhenIncludingIndex) {
+ const bool includeNullIfEmptyOrMissing = false;
+ const boost::optional<std::string> includeArrayIndex = std::string("arrIndex");
+ auto unwind = DocumentSourceUnwind::create(
+ getExpCtx(), "array", includeNullIfEmptyOrMissing, includeArrayIndex);
+
+ auto modifiedPaths = unwind->getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet);
+ ASSERT_EQUALS(2U, modifiedPaths.paths.size());
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("array"));
+ ASSERT_EQUALS(1U, modifiedPaths.paths.count("arrIndex"));
+}
+
//
// Error cases.
//
diff --git a/src/mongo/db/pipeline/parsed_add_fields.h b/src/mongo/db/pipeline/parsed_add_fields.h
index 240b17faead..58b8445e727 100644
--- a/src/mongo/db/pipeline/parsed_add_fields.h
+++ b/src/mongo/db/pipeline/parsed_add_fields.h
@@ -96,6 +96,11 @@ public:
return DocumentSource::SEE_NEXT;
}
+ DocumentSource::GetModPathsReturn getModifiedPaths() const final {
+ // TODO SERVER-25510 Only report added paths as modified.
+ return {DocumentSource::GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}};
+ }
+
/**
* Add the specified fields to 'inputDoc'.
*
diff --git a/src/mongo/db/pipeline/parsed_exclusion_projection.cpp b/src/mongo/db/pipeline/parsed_exclusion_projection.cpp
index 198a85e7a69..8226d146009 100644
--- a/src/mongo/db/pipeline/parsed_exclusion_projection.cpp
+++ b/src/mongo/db/pipeline/parsed_exclusion_projection.cpp
@@ -121,6 +121,16 @@ Value ExclusionNode::applyProjectionToValue(Value val) const {
}
}
+void ExclusionNode::addModifiedPaths(std::set<std::string>* modifiedPaths) const {
+ for (auto&& excludedField : _excludedFields) {
+ modifiedPaths->insert(FieldPath::getFullyQualifiedPath(_pathToNode, excludedField));
+ }
+
+ for (auto&& childPair : _children) {
+ childPair.second->addModifiedPaths(modifiedPaths);
+ }
+}
+
//
// ParsedExclusionProjection.
//
diff --git a/src/mongo/db/pipeline/parsed_exclusion_projection.h b/src/mongo/db/pipeline/parsed_exclusion_projection.h
index 18fb7dc1a60..ea7b25ac33f 100644
--- a/src/mongo/db/pipeline/parsed_exclusion_projection.h
+++ b/src/mongo/db/pipeline/parsed_exclusion_projection.h
@@ -71,6 +71,7 @@ public:
*/
ExclusionNode* addOrGetChild(FieldPath field);
+ void addModifiedPaths(std::set<std::string>* modifiedPaths) const;
private:
// Helpers for addOrGetChild above.
@@ -116,10 +117,16 @@ public:
*/
Document applyProjection(Document inputDoc) const final;
- DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const {
+ DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const final {
return DocumentSource::SEE_NEXT;
}
+ DocumentSource::GetModPathsReturn getModifiedPaths() const final {
+ std::set<std::string> modifiedPaths;
+ _root->addModifiedPaths(&modifiedPaths);
+ return {DocumentSource::GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)};
+ }
+
private:
/**
* Helper for parse() above.
diff --git a/src/mongo/db/pipeline/parsed_exclusion_projection_test.cpp b/src/mongo/db/pipeline/parsed_exclusion_projection_test.cpp
index a9fb218d8ef..3673330dbfb 100644
--- a/src/mongo/db/pipeline/parsed_exclusion_projection_test.cpp
+++ b/src/mongo/db/pipeline/parsed_exclusion_projection_test.cpp
@@ -117,6 +117,29 @@ TEST(ExclusionProjection, ShouldNotAddAnyDependencies) {
ASSERT_FALSE(deps.getNeedTextScore());
}
+TEST(ExclusionProjection, ShouldReportExcludedFieldsAsModified) {
+ ParsedExclusionProjection exclusion;
+ exclusion.parse(BSON("_id" << false << "a" << false << "b.c" << false));
+
+ auto modifiedPaths = exclusion.getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet);
+ ASSERT_EQ(modifiedPaths.paths.count("_id"), 1UL);
+ ASSERT_EQ(modifiedPaths.paths.count("a"), 1UL);
+ ASSERT_EQ(modifiedPaths.paths.count("b.c"), 1UL);
+ ASSERT_EQ(modifiedPaths.paths.size(), 3UL);
+}
+
+TEST(ExclusionProjection, ShouldReportExcludedFieldsAsModifiedWhenSpecifiedAsNestedObj) {
+ ParsedExclusionProjection exclusion;
+ exclusion.parse(BSON("a" << BSON("b" << false << "c" << BSON("d" << false))));
+
+ auto modifiedPaths = exclusion.getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet);
+ ASSERT_EQ(modifiedPaths.paths.count("a.b"), 1UL);
+ ASSERT_EQ(modifiedPaths.paths.count("a.c.d"), 1UL);
+ ASSERT_EQ(modifiedPaths.paths.size(), 2UL);
+}
+
//
// Tests of execution of exclusions at the top level.
//
diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.cpp b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp
index 4842500cd90..77fccf8107f 100644
--- a/src/mongo/db/pipeline/parsed_inclusion_projection.cpp
+++ b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp
@@ -236,6 +236,18 @@ InclusionNode* InclusionNode::addChild(string field) {
return insertedPair.first->second.get();
}
+void InclusionNode::addPreservedPaths(std::set<std::string>* preservedPaths) const {
+ // Only our inclusion paths are preserved. This inclusion node may also have paths with
+ // associated expressions, but those paths are modified and therefore are not considered
+ // "preserved".
+ for (auto&& includedField : _inclusions) {
+ preservedPaths->insert(FieldPath::getFullyQualifiedPath(_pathToNode, includedField));
+ }
+ for (auto&& childPair : _children) {
+ childPair.second->addPreservedPaths(preservedPaths);
+ }
+}
+
//
// ParsedInclusionProjection
//
diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.h b/src/mongo/db/pipeline/parsed_inclusion_projection.h
index eb2bc1877bc..8cd04514b53 100644
--- a/src/mongo/db/pipeline/parsed_inclusion_projection.h
+++ b/src/mongo/db/pipeline/parsed_inclusion_projection.h
@@ -121,6 +121,11 @@ public:
void injectExpressionContext(const boost::intrusive_ptr<ExpressionContext>& expCtx);
+ /**
+ * Recursively add all paths that are preserved by this inclusion projection.
+ */
+ void addPreservedPaths(std::set<std::string>* preservedPaths) const;
+
private:
// Helpers for the Document versions above. These will apply the transformation recursively to
// each element of any arrays, and ensure non-documents are handled appropriately.
@@ -214,6 +219,12 @@ public:
return DocumentSource::EXHAUSTIVE_FIELDS;
}
+ DocumentSource::GetModPathsReturn getModifiedPaths() const final {
+ std::set<std::string> preservedPaths;
+ _root->addPreservedPaths(&preservedPaths);
+ return {DocumentSource::GetModPathsReturn::Type::kAllExcept, std::move(preservedPaths)};
+ }
+
/**
* Apply this exclusion projection to 'inputDoc'.
*
diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp b/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp
index b2833638543..56da137715e 100644
--- a/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp
+++ b/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp
@@ -174,6 +174,48 @@ TEST(InclusionProjection, ShouldOptimizeNestedExpressions) {
ASSERT_DOCUMENT_EQ(expectedSerialization, inclusion.serialize(true));
}
+TEST(InclusionProjection, ShouldReportThatAllExceptIncludedFieldsAreModified) {
+ ParsedInclusionProjection inclusion;
+ inclusion.parse(BSON(
+ "a" << wrapInLiteral("computedVal") << "b.c" << wrapInLiteral("computedVal") << "d" << true
+ << "e.f"
+ << true));
+
+ auto modifiedPaths = inclusion.getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kAllExcept);
+ // Included paths are not modified.
+ ASSERT_EQ(modifiedPaths.paths.count("_id"), 1UL);
+ ASSERT_EQ(modifiedPaths.paths.count("d"), 1UL);
+ ASSERT_EQ(modifiedPaths.paths.count("e.f"), 1UL);
+ // Computed paths are modified.
+ ASSERT_EQ(modifiedPaths.paths.count("a"), 0UL);
+ ASSERT_EQ(modifiedPaths.paths.count("b.c"), 0UL);
+ ASSERT_EQ(modifiedPaths.paths.size(), 3UL);
+}
+
+TEST(InclusionProjection, ShouldReportThatAllExceptIncludedFieldsAreModifiedWithIdExclusion) {
+ ParsedInclusionProjection inclusion;
+ inclusion.parse(BSON("_id" << false << "a" << wrapInLiteral("computedVal") << "b.c"
+ << wrapInLiteral("computedVal")
+ << "d"
+ << true
+ << "e.f"
+ << true));
+
+ auto modifiedPaths = inclusion.getModifiedPaths();
+ ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kAllExcept);
+ // Included paths are not modified.
+ ASSERT_EQ(modifiedPaths.paths.count("d"), 1UL);
+ ASSERT_EQ(modifiedPaths.paths.count("e.f"), 1UL);
+ // Computed paths are modified.
+ ASSERT_EQ(modifiedPaths.paths.count("a"), 0UL);
+ ASSERT_EQ(modifiedPaths.paths.count("b.c"), 0UL);
+ // _id is explicitly excluded.
+ ASSERT_EQ(modifiedPaths.paths.count("_id"), 0UL);
+
+ ASSERT_EQ(modifiedPaths.paths.size(), 2UL);
+}
+
//
// Top-level only.
//
diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp
index 16625527383..315b0b10cfd 100644
--- a/src/mongo/db/pipeline/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/pipeline_test.cpp
@@ -338,6 +338,7 @@ class LookupShouldSplitMatch : public Base {
" {$match: {asField: {$eq: 3}}}]";
}
};
+
class LookupShouldNotAbsorbMatchOnAs : public Base {
string inputPipeJson() {
return "[{$lookup: {from: 'foo', as: 'asField', localField: 'y', foreignField: 'z'}}, "
@@ -714,6 +715,134 @@ class GraphLookupShouldNotCoalesceWithUnwindNotOnAs : public Base {
}
};
+class GraphLookupShouldSwapWithMatch : public Base {
+ string inputPipeJson() {
+ return "[{$graphLookup: {"
+ " from: 'coll2',"
+ " as: 'results',"
+ " connectToField: 'to',"
+ " connectFromField: 'from',"
+ " startWith: '$startVal'"
+ " }},"
+ " {$match: {independent: 'x'}}"
+ "]";
+ }
+ string outputPipeJson() {
+ return "[{$match: {independent: 'x'}},"
+ " {$graphLookup: {"
+ " from: 'coll2',"
+ " as: 'results',"
+ " connectToField: 'to',"
+ " connectFromField: 'from',"
+ " startWith: '$startVal'"
+ " }}]";
+ }
+};
+
+class ExclusionProjectShouldSwapWithIndependentMatch : public Base {
+ string inputPipeJson() final {
+ return "[{$project: {redacted: 0}}, {$match: {unrelated: 4}}]";
+ }
+ string outputPipeJson() final {
+ return "[{$match: {unrelated: 4}}, {$project: {redacted: false}}]";
+ }
+};
+
+class ExclusionProjectShouldNotSwapWithMatchOnExcludedFields : public Base {
+ string inputPipeJson() final {
+ return "[{$project: {subdoc: {redacted: false}}}, {$match: {'subdoc.redacted': 4}}]";
+ }
+ string outputPipeJson() final {
+ return inputPipeJson();
+ }
+};
+
+class MatchShouldSplitIfPartIsIndependentOfExclusionProjection : public Base {
+ string inputPipeJson() final {
+ return "[{$project: {redacted: 0}},"
+ " {$match: {redacted: 'x', unrelated: 4}}]";
+ }
+ string outputPipeJson() final {
+ return "[{$match: {unrelated: {$eq: 4}}},"
+ " {$project: {redacted: false}},"
+ " {$match: {redacted: {$eq: 'x'}}}]";
+ }
+};
+
+class InclusionProjectShouldSwapWithIndependentMatch : public Base {
+ string inputPipeJson() final {
+ return "[{$project: {included: 1}}, {$match: {included: 4}}]";
+ }
+ string outputPipeJson() final {
+ return "[{$match: {included: 4}}, {$project: {_id: true, included: true}}]";
+ }
+};
+
+class InclusionProjectShouldNotSwapWithMatchOnFieldsNotIncluded : public Base {
+ string inputPipeJson() final {
+ return "[{$project: {_id: true, included: true, subdoc: {included: true}}},"
+ " {$match: {notIncluded: 'x', unrelated: 4}}]";
+ }
+ string outputPipeJson() final {
+ return inputPipeJson();
+ }
+};
+
+class MatchShouldSplitIfPartIsIndependentOfInclusionProjection : public Base {
+ string inputPipeJson() final {
+ return "[{$project: {_id: true, included: true}},"
+ " {$match: {included: 'x', unrelated: 4}}]";
+ }
+ string outputPipeJson() final {
+ return "[{$match: {included: {$eq: 'x'}}},"
+ " {$project: {_id: true, included: true}},"
+ " {$match: {unrelated: {$eq: 4}}}]";
+ }
+};
+
+class TwoMatchStagesShouldBothPushIndependentPartsBeforeProjection : public Base {
+ string inputPipeJson() final {
+ return "[{$project: {_id: true, included: true}},"
+ " {$match: {included: 'x', unrelated: 4}},"
+ " {$match: {included: 'y', unrelated: 5}}]";
+ }
+ string outputPipeJson() final {
+ return "[{$match: {$and: [{included: {$eq: 'x'}}, {included: {$eq: 'y'}}]}},"
+ " {$project: {_id: true, included: true}},"
+ " {$match: {$and: [{unrelated: {$eq: 4}}, {unrelated: {$eq: 5}}]}}]";
+ }
+};
+
+class NeighboringMatchesShouldCoalesce : public Base {
+ string inputPipeJson() final {
+ return "[{$match: {x: 'x'}},"
+ " {$match: {y: 'y'}}]";
+ }
+ string outputPipeJson() final {
+ return "[{$match: {$and: [{x: 'x'}, {y: 'y'}]}}]";
+ }
+};
+
+class MatchShouldNotSwapBeforeLimit : public Base {
+ string inputPipeJson() final {
+ return "[{$limit: 3},"
+ " {$match: {y: 'y'}}]";
+ }
+ string outputPipeJson() final {
+ return inputPipeJson();
+ }
+};
+
+class MatchShouldNotSwapBeforeSkip : public Base {
+ string inputPipeJson() final {
+ return "[{$skip: 3},"
+ " {$match: {y: 'y'}}]";
+ }
+ string outputPipeJson() final {
+ return inputPipeJson();
+ }
+};
+
} // namespace Local
namespace Sharded {
@@ -1324,6 +1453,7 @@ public:
add<Optimizations::Local::GraphLookupShouldCoalesceWithUnwindOnAsWithPreserveEmpty>();
add<Optimizations::Local::GraphLookupShouldCoalesceWithUnwindOnAsWithIncludeArrayIndex>();
add<Optimizations::Local::GraphLookupShouldNotCoalesceWithUnwindNotOnAs>();
+ add<Optimizations::Local::GraphLookupShouldSwapWithMatch>();
add<Optimizations::Local::MatchShouldDuplicateItselfBeforeRedact>();
add<Optimizations::Local::MatchShouldSwapWithUnwind>();
add<Optimizations::Local::MatchShouldNotOptimizeWhenMatchingOnIndexField>();
@@ -1333,6 +1463,16 @@ public:
add<Optimizations::Local::MatchWithOrDoesNotSplit>();
add<Optimizations::Local::MatchShouldSplitOnUnwind>();
add<Optimizations::Local::UnwindBeforeDoubleMatchShouldRepeatedlyOptimize>();
+ add<Optimizations::Local::ExclusionProjectShouldSwapWithIndependentMatch>();
+ add<Optimizations::Local::ExclusionProjectShouldNotSwapWithMatchOnExcludedFields>();
+ add<Optimizations::Local::MatchShouldSplitIfPartIsIndependentOfExclusionProjection>();
+ add<Optimizations::Local::InclusionProjectShouldSwapWithIndependentMatch>();
+ add<Optimizations::Local::InclusionProjectShouldNotSwapWithMatchOnFieldsNotIncluded>();
+ add<Optimizations::Local::MatchShouldSplitIfPartIsIndependentOfInclusionProjection>();
+ add<Optimizations::Local::TwoMatchStagesShouldBothPushIndependentPartsBeforeProjection>();
+ add<Optimizations::Local::NeighboringMatchesShouldCoalesce>();
+ add<Optimizations::Local::MatchShouldNotSwapBeforeLimit>();
+ add<Optimizations::Local::MatchShouldNotSwapBeforeSkip>();
add<Optimizations::Sharded::Empty>();
add<Optimizations::Sharded::coalesceLookUpAndUnwind::ShouldCoalesceUnwindOnAs>();
add<Optimizations::Sharded::coalesceLookUpAndUnwind::