diff options
author | David Storch <david.storch@10gen.com> | 2016-10-03 11:48:27 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2016-10-06 11:19:24 -0400 |
commit | 4976eb401b3f16482fb35287ea60d7e8b33fe76d (patch) | |
tree | 3ab5cfaec3f76dc334b006f0b1eeb7162772d4d6 | |
parent | a2d34afd4330258e05c5ac1e9174338cfda29a10 (diff) | |
download | mongo-4976eb401b3f16482fb35287ea60d7e8b33fe76d.tar.gz |
SERVER-19153 push $match as far forward as possible
30 files changed, 853 insertions, 196 deletions
diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h index c8747acac33..c4316213827 100644 --- a/src/mongo/db/matcher/expression_algo.h +++ b/src/mongo/db/matcher/expression_algo.h @@ -28,6 +28,12 @@ * it in the license file. */ +#pragma once + +#include <memory> +#include <set> + +#include "mongo/base/string_data.h" #include "mongo/stdx/functional.h" namespace mongo { diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp index e8fe53ddc93..c97d766679a 100644 --- a/src/mongo/db/matcher/expression_algo_test.cpp +++ b/src/mongo/db/matcher/expression_algo_test.cpp @@ -906,6 +906,28 @@ TEST(SplitMatchExpression, ComplexMatchExpressionSplitsCorrectly) { "{$eq: 1}}]}]}, {y: {$eq: 3}}]}]}]}")); } +TEST(SplitMatchExpression, ShouldNotExtractPrefixOfDottedPathAsIndependent) { + BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {'a.b': 1}, {'a.c': 1}]}"); + const CollatorInterface* collator = nullptr; + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator); + ASSERT_OK(status.getStatus()); + + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(status.getValue()), {"a.b"}); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + + ASSERT_BSONOBJ_EQ(firstBob.obj(), fromjson("{'a.c': {$eq: 1}}")); + ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{$and: [{a: {$eq: 1}}, {'a.b': {$eq: 1}}]}")); +} + TEST(MapOverMatchExpression, DoesMapOverLogicalNodes) { BSONObj matchPredicate = fromjson("{a: {$not: {$eq: 1}}}"); const CollatorInterface* collator = nullptr; diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp index ae456bf035d..a78c307fad3 100644 --- a/src/mongo/db/pipeline/document_source.cpp +++ b/src/mongo/db/pipeline/document_source.cpp @@ -28,6 +28,7 @@ #include "mongo/platform/basic.h" +#include "mongo/db/matcher/expression_algo.h" #include "mongo/db/pipeline/document_source.h" #include "mongo/db/pipeline/expression_context.h" #include "mongo/db/pipeline/value.h" @@ -88,6 +89,107 @@ intrusive_ptr<DocumentSource> DocumentSource::optimize() { return this; } +namespace { + +/** + * Given a set of paths 'dependencies', determines which of those paths will be modified if all + * paths except those in 'preservedPaths' are modified. + * + * For example, extractModifiedDependencies({'a', 'b', 'c.d', 'e'}, {'a', 'b.c', c'}) returns + * {'b', 'e'}, since 'b' and 'e' are not preserved (only 'b.c' is preserved). + */ +std::set<std::string> extractModifiedDependencies(const std::set<std::string>& dependencies, + const std::set<std::string>& preservedPaths) { + std::set<std::string> modifiedDependencies; + + // The modified dependencies is *almost* the set difference 'dependencies' - 'preservedPaths', + // except that if p in 'preservedPaths' is a "path prefix" of d in 'dependencies', then 'd' + // should not be included in the modified dependencies. + for (auto&& dependency : dependencies) { + bool preserved = false; + auto depAsPath = FieldPath(dependency); + auto firstField = depAsPath.getFieldName(0); + // If even a prefix is preserved, the path is preserved, so search for any prefixes of + // 'dependency' as well. 'preservedPaths' is an *ordered* set, so we only have to search the + // range ['firstField', 'dependency'] to find any prefixes of 'dependency'. + for (auto it = preservedPaths.lower_bound(firstField); + it != preservedPaths.upper_bound(dependency); + ++it) { + if (*it == dependency || expression::isPathPrefixOf(*it, dependency)) { + preserved = true; + break; + } + } + if (!preserved) { + modifiedDependencies.insert(dependency); + } + } + return modifiedDependencies; +} + +/** + * Returns a pair of pointers to $match stages, either of which can be null. The first entry in the + * pair is a $match stage that can be moved before this stage, the second is a $match stage that + * must remain after this stage. + */ +std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>> +splitMatchByModifiedFields(const boost::intrusive_ptr<DocumentSourceMatch>& match, + const DocumentSource::GetModPathsReturn& modifiedPathsRet) { + // Attempt to move some or all of this $match before this stage. + std::set<std::string> modifiedPaths; + switch (modifiedPathsRet.type) { + case DocumentSource::GetModPathsReturn::Type::kNotSupported: + // We don't know what paths this stage might modify, so refrain from swapping. + return {nullptr, match}; + case DocumentSource::GetModPathsReturn::Type::kAllPaths: + // This stage modifies all paths, so cannot be swapped with a $match at all. + return {nullptr, match}; + case DocumentSource::GetModPathsReturn::Type::kFiniteSet: + modifiedPaths = std::move(modifiedPathsRet.paths); + break; + case DocumentSource::GetModPathsReturn::Type::kAllExcept: { + DepsTracker depsTracker; + match->getDependencies(&depsTracker); + modifiedPaths = extractModifiedDependencies(depsTracker.fields, modifiedPathsRet.paths); + } + } + return match->splitSourceBy(modifiedPaths); +} + +} // namespace + +Pipeline::SourceContainer::iterator DocumentSource::optimizeAt( + Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { + invariant(*itr == this && (std::next(itr) != container->end())); + auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get()); + if (canSwapWithMatch() && nextMatch && !nextMatch->isTextQuery()) { + // We're allowed to swap with a $match and the stage after us is a $match. Furthermore, the + // $match does not contain a text search predicate, which we do not attempt to optimize + // because such a $match must already be the first stage in the pipeline. We can attempt to + // swap the $match or part of the $match before ourselves. + auto splitMatch = splitMatchByModifiedFields(nextMatch, getModifiedPaths()); + invariant(splitMatch.first || splitMatch.second); + + if (splitMatch.first) { + // At least part of the $match can be moved before this stage. Erase the original $match + // and put the independent part before this stage. If splitMatch.second is not null, + // then there is a new $match stage to insert after ourselves which is dependent on the + // modified fields. + container->erase(std::next(itr)); + container->insert(itr, std::move(splitMatch.first)); + if (splitMatch.second) { + container->insert(std::next(itr), std::move(splitMatch.second)); + } + + // The stage before the new $match may be able to optimize further, if there is such a + // stage. + return std::prev(itr) == container->begin() ? std::prev(itr) + : std::prev(std::prev(itr)); + } + } + return doOptimizeAt(itr, container); +} + void DocumentSource::dispose() { if (pSource) { pSource->dispose(); diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index 2d780df649c..d5d3c02f07b 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -255,57 +255,6 @@ public: virtual void setSource(DocumentSource* pSource); /** - * Gets a BSONObjSet representing the sort order(s) of the output of the stage. - */ - virtual BSONObjSet getOutputSorts() { - return SimpleBSONObjComparator::kInstance.makeBSONObjSet(); - } - - /** - * Returns an optimized DocumentSource that is semantically equivalent to this one, or - * nullptr if this stage is a no-op. Implementations are allowed to modify themselves - * in-place and return a pointer to themselves. For best results, first optimize the pipeline - * with the optimizePipeline() method defined in pipeline.cpp. - * - * This is intended for any operations that include expressions, and provides a hook for - * those to optimize those operations. - * - * The default implementation is to do nothing and return yourself. - */ - virtual boost::intrusive_ptr<DocumentSource> optimize(); - - /** - * Attempt to perform an optimization with the following source in the pipeline. 'container' - * refers to the entire pipeline, and 'itr' points to this stage within the pipeline. - * - * The return value is an iterator over the same container which points to the first location - * in the container at which an optimization may be possible. - * - * For example, if a swap takes place, the returned iterator should just be the position - * directly preceding 'itr', if such a position exists, since the stage at that position may be - * able to perform further optimizations with its new neighbor. - */ - virtual Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, - Pipeline::SourceContainer* container) { - return std::next(itr); - }; - - enum GetDepsReturn { - NOT_SUPPORTED = 0x0, // The full object and all metadata may be required - SEE_NEXT = 0x1, // Later stages could need either fields or metadata - EXHAUSTIVE_FIELDS = 0x2, // Later stages won't need more fields from input - EXHAUSTIVE_META = 0x4, // Later stages won't need more metadata from input - EXHAUSTIVE_ALL = EXHAUSTIVE_FIELDS | EXHAUSTIVE_META, // Later stages won't need either - }; - - /** - * Get the dependencies this operation needs to do its job. - */ - virtual GetDepsReturn getDependencies(DepsTracker* deps) const { - return NOT_SUPPORTED; - } - - /** * In the default case, serializes the DocumentSource and adds it to the std::vector<Value>. * * A subclass may choose to overwrite this, rather than serialize, @@ -378,6 +327,134 @@ public: */ static BSONObjSet truncateSortSet(const BSONObjSet& sorts, const std::set<std::string>& fields); + // + // Optimization API - These methods give each DocumentSource an opportunity to apply any local + // optimizations, and to provide any rule-based optimizations to swap with or absorb subsequent + // stages. + // + + /** + * The non-virtual public interface for optimization. Attempts to do some generic optimizations + * such as pushing $matches as early in the pipeline as possible, then calls out to + * doOptimizeAt() for stage-specific optimizations. + * + * Subclasses should override doOptimizeAt() if they can apply some optimization(s) based on + * subsequent stages in the pipeline. + */ + Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container); + + /** + * Returns an optimized DocumentSource that is semantically equivalent to this one, or + * nullptr if this stage is a no-op. Implementations are allowed to modify themselves + * in-place and return a pointer to themselves. For best results, first optimize the pipeline + * with the optimizePipeline() method defined in pipeline.cpp. + * + * This is intended for any operations that include expressions, and provides a hook for + * those to optimize those operations. + * + * The default implementation is to do nothing and return yourself. + */ + virtual boost::intrusive_ptr<DocumentSource> optimize(); + + // + // Property Analysis - These methods allow a DocumentSource to expose information about + // properties of themselves, such as which fields they need to apply their transformations, and + // whether or not they produce or preserve a sort order. + // + // Property analysis can be useful during optimization (e.g. analysis of sort orders determines + // whether or not a blocking group can be upgraded to a streaming group). + // + + /** + * Gets a BSONObjSet representing the sort order(s) of the output of the stage. + */ + virtual BSONObjSet getOutputSorts() { + return SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + } + + struct GetModPathsReturn { + enum class Type { + // No information is available about which paths are modified. + kNotSupported, + + // All fields will be modified. This should be used by stages like $replaceRoot which + // modify the entire document. + kAllPaths, + + // A finite set of paths will be modified by this stage. This is true for something like + // {$project: {a: 0, b: 0}}, which will only modify 'a' and 'b', and leave all other + // paths unmodified. + kFiniteSet, + + // This stage will modify an infinite set of paths, but we know which paths it will not + // modify. For example, the stage {$project: {_id: 1, a: 1}} will leave only the fields + // '_id' and 'a' unmodified, but all other fields will be projected out. + kAllExcept, + }; + + GetModPathsReturn(Type type, std::set<std::string>&& paths) + : type(type), paths(std::move(paths)) {} + + Type type; + std::set<std::string> paths; + }; + + /** + * Returns information about which paths are added, removed, or updated by this stage. The + * default implementation uses kNotSupported to indicate that the set of modified paths for this + * stage is not known. + * + * See GetModPathsReturn above for the possible return values and what they mean. + */ + virtual GetModPathsReturn getModifiedPaths() const { + return {GetModPathsReturn::Type::kNotSupported, std::set<std::string>{}}; + } + + /** + * Returns whether this stage can swap with a subsequent $match stage, provided that the match + * does not depend on the paths returned by getModifiedPaths(). + * + * Subclasses which want to participate in match swapping should override this to return true. + * Such a subclass must also override getModifiedPaths() to provide information about which + * $match predicates be swapped before itself. + */ + virtual bool canSwapWithMatch() const { + return false; + } + + enum GetDepsReturn { + // The full object and all metadata may be required. + NOT_SUPPORTED = 0x0, + + // Later stages could need either fields or metadata. For example, a $limit stage will pass + // through all fields, and they may or may not be needed by future stages. + SEE_NEXT = 0x1, + + // Later stages won't need more fields from input. For example, an inclusion projection like + // {_id: 1, a: 1} will only output two fields, so future stages cannot possibly depend on + // any other fields. + EXHAUSTIVE_FIELDS = 0x2, + + // Later stages won't need more metadata from input. For example, a $group stage will group + // documents together, discarding their text score. + EXHAUSTIVE_META = 0x4, + + // Later stages won't need either fields or metadata. + EXHAUSTIVE_ALL = EXHAUSTIVE_FIELDS | EXHAUSTIVE_META, + }; + + /** + * Get the dependencies this operation needs to do its job. If overridden, subclasses must add + * all paths needed to apply their transformation to 'deps->fields', and call + * 'deps->setNeedTextScore()' if the text score is required. + * + * See GetDepsReturn above for the possible return values and what they mean. + */ + virtual GetDepsReturn getDependencies(DepsTracker* deps) const { + return NOT_SUPPORTED; + } + protected: /** Base constructor. @@ -393,6 +470,24 @@ protected: */ virtual void doInjectExpressionContext() {} + /** + * Attempt to perform an optimization with the following source in the pipeline. 'container' + * refers to the entire pipeline, and 'itr' points to this stage within the pipeline. The caller + * must guarantee that std::next(itr) != container->end(). + * + * The return value is an iterator over the same container which points to the first location + * in the container at which an optimization may be possible. + * + * For example, if a swap takes place, the returned iterator should just be the position + * directly preceding 'itr', if such a position exists, since the stage at that position may be + * able to perform further optimizations with its new neighbor. + */ + virtual Pipeline::SourceContainer::iterator doOptimizeAt( + Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { + return std::next(itr); + }; + + /* Most DocumentSources have an underlying source they get their data from. This is a convenience for them. @@ -577,8 +672,8 @@ public: /** * Attempts to combine with any subsequent $limit stages by setting the internal '_limit' field. */ - Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, - Pipeline::SourceContainer* container) final; + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; Value serialize(bool explain = false) const final; bool isValidInitialSource() const final { return true; @@ -879,12 +974,13 @@ public: return pSource ? pSource->getOutputSorts() : SimpleBSONObjComparator::kInstance.makeBSONObjSet(); } + /** * Attempts to combine with any subsequent $match stages, joining the query objects with a * $and. */ - Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, - Pipeline::SourceContainer* container) final; + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; void setSource(DocumentSource* Source) final; GetDepsReturn getDependencies(DepsTracker* deps) const final; @@ -947,7 +1043,7 @@ public: * For example, {$match: {a: "foo", "b.c": 4}} split by "b" will return pointers to two stages: * {$match: {a: "foo"}}, and {$match: {"b.c": 4}}. */ - std::pair<boost::intrusive_ptr<DocumentSource>, boost::intrusive_ptr<DocumentSource>> + std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>> splitSourceBy(const std::set<std::string>& fields); /** @@ -1143,6 +1239,7 @@ public: virtual DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const = 0; virtual void injectExpressionContext( const boost::intrusive_ptr<ExpressionContext>& pExpCtx) = 0; + virtual GetModPathsReturn getModifiedPaths() const = 0; }; DocumentSourceSingleDocumentTransformation( @@ -1156,10 +1253,15 @@ public: boost::intrusive_ptr<DocumentSource> optimize() final; void dispose() final; Value serialize(bool explain) const final; - Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, - Pipeline::SourceContainer* container) final; + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; void doInjectExpressionContext() final; DocumentSource::GetDepsReturn getDependencies(DepsTracker* deps) const final; + GetModPathsReturn getModifiedPaths() const final; + + bool canSwapWithMatch() const final { + return true; + } private: // Stores transformation logic. @@ -1250,8 +1352,8 @@ public: * Attempts to duplicate the redact-safe portion of a subsequent $match before the $redact * stage. */ - Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, - Pipeline::SourceContainer* container) final; + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; void doInjectExpressionContext() final; @@ -1387,8 +1489,8 @@ public: /** * Attempts to combine with a subsequent $limit stage, setting 'limit' appropriately. */ - Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, - Pipeline::SourceContainer* container) final; + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; Value serialize(bool explain = false) const final; GetDepsReturn getDependencies(DepsTracker* deps) const final { @@ -1450,17 +1552,24 @@ public: const char* getSourceName() const final; void serializeToArray(std::vector<Value>& array, bool explain = false) const final; + GetModPathsReturn getModifiedPaths() const final { + // A $sort does not modify any paths. + return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}}; + } + + bool canSwapWithMatch() const final { + return true; + } + BSONObjSet getOutputSorts() final { return allPrefixes(_sort); } /** - * Attempts to move a subsequent $match stage before the $sort, reducing the number of - * documents that pass through the stage. Also attempts to absorb a subsequent $limit stage so - * that it an perform a top-k sort. + * Attempts to absorb a subsequent $limit stage so that it an perform a top-k sort. */ - Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, - Pipeline::SourceContainer* container) final; + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; void dispose() final; GetDepsReturn getDependencies(DepsTracker* deps) const final; @@ -1601,8 +1710,8 @@ public: * Attempts to move a subsequent $limit before the skip, potentially allowing for forther * optimizations earlier in the pipeline. */ - Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, - Pipeline::SourceContainer* container) final; + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; Value serialize(bool explain = false) const final; boost::intrusive_ptr<DocumentSource> optimize() final; BSONObjSet getOutputSorts() final { @@ -1661,14 +1770,16 @@ public: Value serialize(bool explain = false) const final; BSONObjSet getOutputSorts() final; - GetDepsReturn getDependencies(DepsTracker* deps) const final; - /** - * If the next stage is a $match, the part of the match that is not dependent on the unwound - * field can be moved into a new, preceding, $match stage. + * Returns the unwound path, and the 'includeArrayIndex' path, if specified. */ - Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, - Pipeline::SourceContainer* container) final; + GetModPathsReturn getModifiedPaths() const final; + + bool canSwapWithMatch() const final { + return true; + } + + GetDepsReturn getDependencies(DepsTracker* deps) const final; /** * Creates a new $unwind DocumentSource from a BSON specification. @@ -1725,8 +1836,8 @@ public: * Attempts to combine with a subsequent limit stage, setting the internal limit field * as a result. */ - Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, - Pipeline::SourceContainer* container) final; + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; bool isValidInitialSource() const final { return true; } @@ -1797,11 +1908,20 @@ public: void serializeToArray(std::vector<Value>& array, bool explain = false) const final; /** + * Returns the 'as' path, and possibly fields modified by an absorbed $unwind. + */ + GetModPathsReturn getModifiedPaths() const final; + + bool canSwapWithMatch() const final { + return true; + } + + /** * Attempts to combine with a subsequent $unwind stage, setting the internal '_unwindSrc' * field. */ - Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, - Pipeline::SourceContainer* container) final; + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; GetDepsReturn getDependencies(DepsTracker* deps) const final; void dispose() final; @@ -1906,10 +2026,19 @@ public: void serializeToArray(std::vector<Value>& array, bool explain = false) const final; /** + * Returns the 'as' path, and possibly the fields modified by an absorbed $unwind. + */ + GetModPathsReturn getModifiedPaths() const final; + + bool canSwapWithMatch() const final { + return true; + } + + /** * Attempts to combine with a subsequent $unwind stage, setting the internal '_unwind' field. */ - Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, - Pipeline::SourceContainer* container) final; + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; GetDepsReturn getDependencies(DepsTracker* deps) const final { _startWith->addDependencies(deps); diff --git a/src/mongo/db/pipeline/document_source_cursor.cpp b/src/mongo/db/pipeline/document_source_cursor.cpp index 132d6546235..e22e72c1ede 100644 --- a/src/mongo/db/pipeline/document_source_cursor.cpp +++ b/src/mongo/db/pipeline/document_source_cursor.cpp @@ -147,7 +147,7 @@ long long DocumentSourceCursor::getLimit() const { return _limit ? _limit->getLimit() : -1; } -Pipeline::SourceContainer::iterator DocumentSourceCursor::optimizeAt( +Pipeline::SourceContainer::iterator DocumentSourceCursor::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); diff --git a/src/mongo/db/pipeline/document_source_geo_near.cpp b/src/mongo/db/pipeline/document_source_geo_near.cpp index 62914105716..1dd78b75a09 100644 --- a/src/mongo/db/pipeline/document_source_geo_near.cpp +++ b/src/mongo/db/pipeline/document_source_geo_near.cpp @@ -71,7 +71,7 @@ DocumentSource::GetNextResult DocumentSourceGeoNear::getNext() { return output.freeze(); } -Pipeline::SourceContainer::iterator DocumentSourceGeoNear::optimizeAt( +Pipeline::SourceContainer::iterator DocumentSourceGeoNear::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); diff --git a/src/mongo/db/pipeline/document_source_graph_lookup.cpp b/src/mongo/db/pipeline/document_source_graph_lookup.cpp index 2e8b5131d01..7b6d6c0b22a 100644 --- a/src/mongo/db/pipeline/document_source_graph_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_graph_lookup.cpp @@ -374,7 +374,18 @@ void DocumentSourceGraphLookUp::performSearch() { doBreadthFirstSearch(); } -Pipeline::SourceContainer::iterator DocumentSourceGraphLookUp::optimizeAt( +DocumentSource::GetModPathsReturn DocumentSourceGraphLookUp::getModifiedPaths() const { + std::set<std::string> modifiedPaths{_as.fullPath()}; + if (_unwind) { + auto pathsModifiedByUnwind = _unwind.get()->getModifiedPaths(); + invariant(pathsModifiedByUnwind.type == GetModPathsReturn::Type::kFiniteSet); + modifiedPaths.insert(pathsModifiedByUnwind.paths.begin(), + pathsModifiedByUnwind.paths.end()); + } + return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)}; +} + +Pipeline::SourceContainer::iterator DocumentSourceGraphLookUp::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); diff --git a/src/mongo/db/pipeline/document_source_graph_lookup_test.cpp b/src/mongo/db/pipeline/document_source_graph_lookup_test.cpp index 6f74a0d6280..ac25e48936e 100644 --- a/src/mongo/db/pipeline/document_source_graph_lookup_test.cpp +++ b/src/mongo/db/pipeline/document_source_graph_lookup_test.cpp @@ -389,5 +389,52 @@ TEST_F(DocumentSourceGraphLookUpTest, ShouldPropagatePausesWhileUnwinding) { ASSERT_TRUE(graphLookupStage->getNext().isEOF()); } +TEST_F(DocumentSourceGraphLookUpTest, GraphLookupShouldReportAsFieldIsModified) { + auto expCtx = getExpCtx(); + NamespaceString fromNs("test", "foreign"); + expCtx->resolvedNamespaces[fromNs.coll()] = {fromNs, std::vector<BSONObj>{}}; + auto graphLookupStage = + DocumentSourceGraphLookUp::create(expCtx, + fromNs, + "results", + "from", + "to", + ExpressionFieldPath::create("startPoint"), + boost::none, + boost::none, + boost::none, + boost::none); + + auto modifiedPaths = graphLookupStage->getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet); + ASSERT_EQ(1U, modifiedPaths.paths.size()); + ASSERT_EQ(1U, modifiedPaths.paths.count("results")); +} + +TEST_F(DocumentSourceGraphLookUpTest, GraphLookupShouldReportFieldsModifiedByAbsorbedUnwind) { + auto expCtx = getExpCtx(); + NamespaceString fromNs("test", "foreign"); + expCtx->resolvedNamespaces[fromNs.coll()] = {fromNs, std::vector<BSONObj>{}}; + auto unwindStage = + DocumentSourceUnwind::create(expCtx, "results", false, std::string("arrIndex")); + auto graphLookupStage = + DocumentSourceGraphLookUp::create(expCtx, + fromNs, + "results", + "from", + "to", + ExpressionFieldPath::create("startPoint"), + boost::none, + boost::none, + boost::none, + unwindStage); + + auto modifiedPaths = graphLookupStage->getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet); + ASSERT_EQ(2U, modifiedPaths.paths.size()); + ASSERT_EQ(1U, modifiedPaths.paths.count("results")); + ASSERT_EQ(1U, modifiedPaths.paths.count("arrIndex")); +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_limit.cpp b/src/mongo/db/pipeline/document_source_limit.cpp index cdc3ed2d0a6..75ca17eca48 100644 --- a/src/mongo/db/pipeline/document_source_limit.cpp +++ b/src/mongo/db/pipeline/document_source_limit.cpp @@ -53,7 +53,7 @@ const char* DocumentSourceLimit::getSourceName() const { return "$limit"; } -Pipeline::SourceContainer::iterator DocumentSourceLimit::optimizeAt( +Pipeline::SourceContainer::iterator DocumentSourceLimit::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); diff --git a/src/mongo/db/pipeline/document_source_lookup.cpp b/src/mongo/db/pipeline/document_source_lookup.cpp index 4607d9a28e8..7a0727bcf45 100644 --- a/src/mongo/db/pipeline/document_source_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_lookup.cpp @@ -159,7 +159,18 @@ DocumentSource::GetNextResult DocumentSourceLookUp::getNext() { return output.freeze(); } -Pipeline::SourceContainer::iterator DocumentSourceLookUp::optimizeAt( +DocumentSource::GetModPathsReturn DocumentSourceLookUp::getModifiedPaths() const { + std::set<std::string> modifiedPaths{_as.fullPath()}; + if (_unwindSrc) { + auto pathsModifiedByUnwind = _unwindSrc->getModifiedPaths(); + invariant(pathsModifiedByUnwind.type == GetModPathsReturn::Type::kFiniteSet); + modifiedPaths.insert(pathsModifiedByUnwind.paths.begin(), + pathsModifiedByUnwind.paths.end()); + } + return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)}; +} + +Pipeline::SourceContainer::iterator DocumentSourceLookUp::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); @@ -174,52 +185,13 @@ Pipeline::SourceContainer::iterator DocumentSourceLookUp::optimizeAt( return itr; } + // Attempt to internalize any predicates of a $match upon the "_as" field. auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get()); if (!nextMatch) { return std::next(itr); } - // Attempt to move part of the $match before ourselves, and internalize any predicates upon the - // "_as" field. - std::string outputPath = _as.fullPath(); - - std::set<std::string> fields = {outputPath}; - if (_handlingUnwind && _unwindSrc->indexPath()) { - fields.insert((*_unwindSrc->indexPath()).fullPath()); - } - - // Attempt to split the $match, putting the independent portion before ourselves. - auto splitMatch = nextMatch->splitSourceBy(fields); - - // Remove the original match from the pipeline. - container->erase(std::next(itr)); - - auto independent = dynamic_cast<DocumentSourceMatch*>(splitMatch.first.get()); - auto dependent = dynamic_cast<DocumentSourceMatch*>(splitMatch.second.get()); - - invariant(independent || dependent); - - auto locationOfNextPossibleOptimization = std::next(itr); - if (independent) { - // If the $match has an independent portion, insert it before ourselves. Keep track of where - // the pipeline should check for the next possible optimization. - container->insert(itr, std::move(independent)); - if (std::prev(itr) == container->begin()) { - locationOfNextPossibleOptimization = std::prev(itr); - } else { - locationOfNextPossibleOptimization = std::prev(std::prev(itr)); - } - } - - if (!dependent) { - // Nothing left to do; the entire $match was moved before us. - return locationOfNextPossibleOptimization; - } - - // Part of the $match was dependent upon us; we must now determine if we need to split the - // $match again to obtain a $match that is a predicate only upon the "_as" path. - if (!_handlingUnwind || _unwindSrc->indexPath() || _unwindSrc->preserveNullAndEmptyArrays()) { // We must be unwinding our result to internalize a $match. For example, consider the // following pipeline: @@ -248,10 +220,18 @@ Pipeline::SourceContainer::iterator DocumentSourceLookUp::optimizeAt( // In addition, we must avoid internalizing a $match if an absorbed $unwind has an // "includeArrayIndex" option, since the $match will alter the indices of the returned // values. - container->insert(std::next(itr), std::move(splitMatch.second)); - return locationOfNextPossibleOptimization; + return std::next(itr); } + auto outputPath = _as.fullPath(); + + // Since $match splitting is handled in a generic way, we expect to have already swapped + // portions of the $match that do not depend on the 'as' path or on an internalized $unwind's + // index path before ourselves. But due to the early return above, we know there is no + // internalized $unwind with an index path. + // + // Therefore, 'nextMatch' should only depend on the 'as' path. We now try to absorb the match on + // the 'as' path in order to push down these predicates into the foreign collection. bool isMatchOnlyOnAs = true; auto computeWhetherMatchOnAs = [&isMatchOnlyOnAs, &outputPath](MatchExpression* expression, std::string path) -> void { @@ -271,24 +251,26 @@ Pipeline::SourceContainer::iterator DocumentSourceLookUp::optimizeAt( } }; - expression::mapOver(dependent->getMatchExpression(), computeWhetherMatchOnAs); + expression::mapOver(nextMatch->getMatchExpression(), computeWhetherMatchOnAs); if (!isMatchOnlyOnAs) { - // "dependent" is not wholly a predicate upon our "_as" field. We must put it back into the - // pipeline as-is. - container->insert(std::next(itr), std::move(splitMatch.second)); - return locationOfNextPossibleOptimization; + // "nextMatch" does not contain any predicates that can be absorbed into this stage. + return std::next(itr); } - // We can internalize the entire $match. + // We can internalize the $match. if (!_handlingMatch) { - _matchSrc = dependent; + _matchSrc = nextMatch; _handlingMatch = true; } else { // We have already absorbed a $match. We need to join it with 'dependent'. - _matchSrc->joinMatchWith(dependent); + _matchSrc->joinMatchWith(nextMatch); } - return locationOfNextPossibleOptimization; + + // Remove the original $match. There may be further optimization between this $lookup and the + // new neighbor, so we return an iterator pointing to ourself. + container->erase(std::next(itr)); + return itr; } void DocumentSourceLookUp::dispose() { diff --git a/src/mongo/db/pipeline/document_source_lookup_test.cpp b/src/mongo/db/pipeline/document_source_lookup_test.cpp index c42eebdb2f1..229b50906e9 100644 --- a/src/mongo/db/pipeline/document_source_lookup_test.cpp +++ b/src/mongo/db/pipeline/document_source_lookup_test.cpp @@ -269,5 +269,53 @@ TEST_F(DocumentSourceLookUpTest, ShouldPropagatePausesWhileUnwinding) { ASSERT_TRUE(lookup->getNext().isEOF()); } +TEST_F(DocumentSourceLookUpTest, LookupReportsAsFieldIsModified) { + auto expCtx = getExpCtx(); + NamespaceString fromNs("test", "foreign"); + expCtx->resolvedNamespaces[fromNs.coll()] = {fromNs, std::vector<BSONObj>{}}; + + // Set up the $lookup stage. + auto lookupSpec = Document{{"$lookup", + Document{{"from", fromNs.coll()}, + {"localField", "foreignId"}, + {"foreignField", "_id"}, + {"as", "foreignDocs"}}}} + .toBson(); + auto parsed = DocumentSourceLookUp::createFromBson(lookupSpec.firstElement(), expCtx); + auto lookup = static_cast<DocumentSourceLookUp*>(parsed.get()); + + auto modifiedPaths = lookup->getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet); + ASSERT_EQ(1U, modifiedPaths.paths.size()); + ASSERT_EQ(1U, modifiedPaths.paths.count("foreignDocs")); +} + +TEST_F(DocumentSourceLookUpTest, LookupReportsFieldsModifiedByAbsorbedUnwind) { + auto expCtx = getExpCtx(); + NamespaceString fromNs("test", "foreign"); + expCtx->resolvedNamespaces[fromNs.coll()] = {fromNs, std::vector<BSONObj>{}}; + + // Set up the $lookup stage. + auto lookupSpec = Document{{"$lookup", + Document{{"from", fromNs.coll()}, + {"localField", "foreignId"}, + {"foreignField", "_id"}, + {"as", "foreignDoc"}}}} + .toBson(); + auto parsed = DocumentSourceLookUp::createFromBson(lookupSpec.firstElement(), expCtx); + auto lookup = static_cast<DocumentSourceLookUp*>(parsed.get()); + + const bool preserveNullAndEmptyArrays = false; + const boost::optional<std::string> includeArrayIndex = std::string("arrIndex"); + lookup->setUnwindStage(DocumentSourceUnwind::create( + expCtx, "foreignDoc", preserveNullAndEmptyArrays, includeArrayIndex)); + + auto modifiedPaths = lookup->getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet); + ASSERT_EQ(2U, modifiedPaths.paths.size()); + ASSERT_EQ(1U, modifiedPaths.paths.count("foreignDoc")); + ASSERT_EQ(1U, modifiedPaths.paths.count("arrIndex")); +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp index 8476c82a67d..d8df9116145 100644 --- a/src/mongo/db/pipeline/document_source_match.cpp +++ b/src/mongo/db/pipeline/document_source_match.cpp @@ -87,7 +87,7 @@ DocumentSource::GetNextResult DocumentSourceMatch::getNext() { return nextInput; } -Pipeline::SourceContainer::iterator DocumentSourceMatch::optimizeAt( +Pipeline::SourceContainer::iterator DocumentSourceMatch::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); @@ -364,7 +364,7 @@ void DocumentSourceMatch::joinMatchWith(intrusive_ptr<DocumentSourceMatch> other _expression = std::move(status.getValue()); } -pair<intrusive_ptr<DocumentSource>, intrusive_ptr<DocumentSource>> +pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>> DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields) { pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> newExpr( expression::splitMatchExpressionBy(std::move(_expression), fields)); @@ -390,15 +390,12 @@ DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields) { BSONObjBuilder firstBob; newExpr.first->serialize(&firstBob); - intrusive_ptr<DocumentSource> firstMatch(new DocumentSourceMatch(firstBob.obj(), pExpCtx)); - // This $match stage is still needed, so update the MatchExpression as needed. BSONObjBuilder secondBob; newExpr.second->serialize(&secondBob); - intrusive_ptr<DocumentSource> secondMatch(new DocumentSourceMatch(secondBob.obj(), pExpCtx)); - - return {firstMatch, secondMatch}; + return {DocumentSourceMatch::create(firstBob.obj(), pExpCtx), + DocumentSourceMatch::create(secondBob.obj(), pExpCtx)}; } boost::intrusive_ptr<DocumentSourceMatch> DocumentSourceMatch::descendMatchOnPath( diff --git a/src/mongo/db/pipeline/document_source_project_test.cpp b/src/mongo/db/pipeline/document_source_project_test.cpp index de2bd12e254..6dca42a98f3 100644 --- a/src/mongo/db/pipeline/document_source_project_test.cpp +++ b/src/mongo/db/pipeline/document_source_project_test.cpp @@ -191,5 +191,55 @@ TEST_F(ProjectStageTest, ExclusionShouldNotAddDependencies) { ASSERT_EQUALS(false, dependencies.getNeedTextScore()); } +TEST_F(ProjectStageTest, InclusionProjectionReportsIncludedPathsFromGetModifiedPaths) { + auto project = DocumentSourceProject::create( + fromjson("{a: true, 'b.c': {d: true}, e: {f: {g: true}}, h: {i: {$literal: true}}}"), + getExpCtx()); + + auto modifiedPaths = project->getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kAllExcept); + ASSERT_EQUALS(4U, modifiedPaths.paths.size()); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("_id")); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("a")); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("b.c.d")); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("e.f.g")); +} + +TEST_F(ProjectStageTest, InclusionProjectionReportsIncludedPathsButExcludesId) { + auto project = DocumentSourceProject::create( + fromjson("{_id: false, 'b.c': {d: true}, e: {f: {g: true}}, h: {i: {$literal: true}}}"), + getExpCtx()); + + auto modifiedPaths = project->getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kAllExcept); + ASSERT_EQUALS(2U, modifiedPaths.paths.size()); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("b.c.d")); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("e.f.g")); +} + +TEST_F(ProjectStageTest, ExclusionProjectionReportsExcludedPathsAsModifiedPaths) { + auto project = DocumentSourceProject::create( + fromjson("{a: false, 'b.c': {d: false}, e: {f: {g: false}}}"), getExpCtx()); + + auto modifiedPaths = project->getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet); + ASSERT_EQUALS(3U, modifiedPaths.paths.size()); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("a")); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("b.c.d")); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("e.f.g")); +} + +TEST_F(ProjectStageTest, ExclusionProjectionReportsExcludedPathsWithIdExclusion) { + auto project = DocumentSourceProject::create( + fromjson("{_id: false, 'b.c': {d: false}, e: {f: {g: false}}}"), getExpCtx()); + + auto modifiedPaths = project->getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet); + ASSERT_EQUALS(3U, modifiedPaths.paths.size()); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("_id")); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("b.c.d")); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("e.f.g")); +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_redact.cpp b/src/mongo/db/pipeline/document_source_redact.cpp index f7570c8c733..e5d8932b0b2 100644 --- a/src/mongo/db/pipeline/document_source_redact.cpp +++ b/src/mongo/db/pipeline/document_source_redact.cpp @@ -72,7 +72,7 @@ DocumentSource::GetNextResult DocumentSourceRedact::getNext() { return nextInput; } -Pipeline::SourceContainer::iterator DocumentSourceRedact::optimizeAt( +Pipeline::SourceContainer::iterator DocumentSourceRedact::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); diff --git a/src/mongo/db/pipeline/document_source_replace_root.cpp b/src/mongo/db/pipeline/document_source_replace_root.cpp index 0c1c1a38e8c..5625e43b1ab 100644 --- a/src/mongo/db/pipeline/document_source_replace_root.cpp +++ b/src/mongo/db/pipeline/document_source_replace_root.cpp @@ -51,7 +51,7 @@ class ReplaceRootTransformation final public: ReplaceRootTransformation() {} - Document applyTransformation(Document input) { + Document applyTransformation(Document input) final { // Extract subdocument in the form of a Value. _variables->setRoot(input); Value newRoot = _newRoot->evaluate(_variables.get()); @@ -81,25 +81,30 @@ public: } // Optimize the newRoot expression. - void optimize() { + void optimize() final { _newRoot->optimize(); } - Document serialize(bool explain) const { + Document serialize(bool explain) const final { return Document{{"newRoot", _newRoot->serialize(explain)}}; } - DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const { + DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const final { _newRoot->addDependencies(deps); // This stage will replace the entire document with a new document, so any existing fields // will be replaced and cannot be required as dependencies. return DocumentSource::EXHAUSTIVE_FIELDS; } - void injectExpressionContext(const boost::intrusive_ptr<ExpressionContext>& pExpCtx) { + void injectExpressionContext(const boost::intrusive_ptr<ExpressionContext>& pExpCtx) final { _newRoot->injectExpressionContext(pExpCtx); } + DocumentSource::GetModPathsReturn getModifiedPaths() const final { + // Replaces the entire root, so all paths are modified. + return {DocumentSource::GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}}; + } + // Create the replaceRoot transformer. Uasserts on invalid input. static std::unique_ptr<ReplaceRootTransformation> create(const BSONElement& spec) { diff --git a/src/mongo/db/pipeline/document_source_replace_root_test.cpp b/src/mongo/db/pipeline/document_source_replace_root_test.cpp index 0e01d4ac7be..16e6c2f5fa0 100644 --- a/src/mongo/db/pipeline/document_source_replace_root_test.cpp +++ b/src/mongo/db/pipeline/document_source_replace_root_test.cpp @@ -270,6 +270,14 @@ TEST_F(ReplaceRootBasics, OnlyDependentFieldIsNewRoot) { ASSERT_EQUALS(false, dependencies.getNeedTextScore()); } +TEST_F(ReplaceRootBasics, ReplaceRootModifiesAllFields) { + auto replaceRoot = createReplaceRoot(BSON("newRoot" + << "$a")); + auto modifiedPaths = replaceRoot->getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kAllPaths); + ASSERT_EQUALS(0U, modifiedPaths.paths.size()); +} + /** * Fixture to test error cases of initializing the $replaceRoot stage. */ diff --git a/src/mongo/db/pipeline/document_source_single_document_transformation.cpp b/src/mongo/db/pipeline/document_source_single_document_transformation.cpp index 222c3ebebdf..953bcc81e53 100644 --- a/src/mongo/db/pipeline/document_source_single_document_transformation.cpp +++ b/src/mongo/db/pipeline/document_source_single_document_transformation.cpp @@ -76,7 +76,7 @@ Value DocumentSourceSingleDocumentTransformation::serialize(bool explain) const return Value(Document{{getSourceName(), _parsedTransform->serialize(explain)}}); } -Pipeline::SourceContainer::iterator DocumentSourceSingleDocumentTransformation::optimizeAt( +Pipeline::SourceContainer::iterator DocumentSourceSingleDocumentTransformation::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); auto nextSkip = dynamic_cast<DocumentSourceSkip*>((*std::next(itr)).get()); @@ -100,4 +100,9 @@ void DocumentSourceSingleDocumentTransformation::doInjectExpressionContext() { _parsedTransform->injectExpressionContext(pExpCtx); } +DocumentSource::GetModPathsReturn DocumentSourceSingleDocumentTransformation::getModifiedPaths() + const { + return _parsedTransform->getModifiedPaths(); +} + } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_skip.cpp b/src/mongo/db/pipeline/document_source_skip.cpp index 137b9ac9483..e052c24895d 100644 --- a/src/mongo/db/pipeline/document_source_skip.cpp +++ b/src/mongo/db/pipeline/document_source_skip.cpp @@ -71,7 +71,7 @@ intrusive_ptr<DocumentSource> DocumentSourceSkip::optimize() { return _nToSkip == 0 ? nullptr : this; } -Pipeline::SourceContainer::iterator DocumentSourceSkip::optimizeAt( +Pipeline::SourceContainer::iterator DocumentSourceSkip::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp index e25867f476b..156b0499d20 100644 --- a/src/mongo/db/pipeline/document_source_sort.cpp +++ b/src/mongo/db/pipeline/document_source_sort.cpp @@ -138,11 +138,10 @@ Document DocumentSourceSort::serializeSortKey(bool explain) const { return keyObj.freeze(); } -Pipeline::SourceContainer::iterator DocumentSourceSort::optimizeAt( +Pipeline::SourceContainer::iterator DocumentSourceSort::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); - auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get()); auto nextLimit = dynamic_cast<DocumentSourceLimit*>((*std::next(itr)).get()); if (nextLimit) { @@ -150,11 +149,6 @@ Pipeline::SourceContainer::iterator DocumentSourceSort::optimizeAt( setLimitSrc(nextLimit); container->erase(std::next(itr)); return itr; - } else if (nextMatch && !nextMatch->isTextQuery()) { - // Swap the $match before the $sort, thus reducing the number of documents that pass into - // this stage. - std::swap(*itr, *std::next(itr)); - return itr == container->begin() ? itr : std::prev(itr); } return std::next(itr); } diff --git a/src/mongo/db/pipeline/document_source_sort_test.cpp b/src/mongo/db/pipeline/document_source_sort_test.cpp index 04864c3cb2d..022b2040ba3 100644 --- a/src/mongo/db/pipeline/document_source_sort_test.cpp +++ b/src/mongo/db/pipeline/document_source_sort_test.cpp @@ -184,6 +184,13 @@ TEST_F(DocumentSourceSortTest, OutputSort) { ASSERT_EQUALS(outputSort.size(), 2U); } +TEST_F(DocumentSourceSortTest, ReportsNoPathsModified) { + createSort(BSON("a" << 1 << "b.c" << -1)); + auto modifiedPaths = sort()->getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet); + ASSERT_EQUALS(0U, modifiedPaths.paths.size()); +} + class DocumentSourceSortExecutionTest : public DocumentSourceSortTest { public: void checkResults(deque<DocumentSource::GetNextResult> inputDocs, diff --git a/src/mongo/db/pipeline/document_source_unwind.cpp b/src/mongo/db/pipeline/document_source_unwind.cpp index a03bd1e8034..02b8dc62355 100644 --- a/src/mongo/db/pipeline/document_source_unwind.cpp +++ b/src/mongo/db/pipeline/document_source_unwind.cpp @@ -232,43 +232,12 @@ BSONObjSet DocumentSourceUnwind::getOutputSorts() { return out; } -Pipeline::SourceContainer::iterator DocumentSourceUnwind::optimizeAt( - Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { - invariant(*itr == this); - - if (auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get())) { - std::set<std::string> fields = {_unwindPath.fullPath()}; - - if (_indexPath) { - fields.insert((*_indexPath).fullPath()); - } - - auto splitMatch = nextMatch->splitSourceBy(fields); - - invariant(splitMatch.first || splitMatch.second); - - if (!splitMatch.first && splitMatch.second) { - // No optimization was possible. - return std::next(itr); - } - - container->erase(std::next(itr)); - - // If splitMatch.second is not null, then there is a new $match stage to insert after - // ourselves. - if (splitMatch.second) { - container->insert(std::next(itr), std::move(splitMatch.second)); - } - - if (splitMatch.first) { - container->insert(itr, std::move(splitMatch.first)); - if (std::prev(itr) == container->begin()) { - return std::prev(itr); - } - return std::prev(std::prev(itr)); - } +DocumentSource::GetModPathsReturn DocumentSourceUnwind::getModifiedPaths() const { + std::set<std::string> modifiedFields{_unwindPath.fullPath()}; + if (_indexPath) { + modifiedFields.insert(_indexPath->fullPath()); } - return std::next(itr); + return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedFields)}; } Value DocumentSourceUnwind::serialize(bool explain) const { diff --git a/src/mongo/db/pipeline/document_source_unwind_test.cpp b/src/mongo/db/pipeline/document_source_unwind_test.cpp index 56ad90dca9a..a782d7b521f 100644 --- a/src/mongo/db/pipeline/document_source_unwind_test.cpp +++ b/src/mongo/db/pipeline/document_source_unwind_test.cpp @@ -717,6 +717,31 @@ TEST_F(UnwindStageTest, ShouldPropagatePauses) { ASSERT_TRUE(unwind->getNext().isEOF()); } +TEST_F(UnwindStageTest, UnwindOnlyModifiesUnwoundPathWhenNotIncludingIndex) { + const bool includeNullIfEmptyOrMissing = false; + const boost::optional<std::string> includeArrayIndex = boost::none; + auto unwind = DocumentSourceUnwind::create( + getExpCtx(), "array", includeNullIfEmptyOrMissing, includeArrayIndex); + + auto modifiedPaths = unwind->getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet); + ASSERT_EQUALS(1U, modifiedPaths.paths.size()); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("array")); +} + +TEST_F(UnwindStageTest, UnwindIncludesIndexPathWhenIncludingIndex) { + const bool includeNullIfEmptyOrMissing = false; + const boost::optional<std::string> includeArrayIndex = std::string("arrIndex"); + auto unwind = DocumentSourceUnwind::create( + getExpCtx(), "array", includeNullIfEmptyOrMissing, includeArrayIndex); + + auto modifiedPaths = unwind->getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet); + ASSERT_EQUALS(2U, modifiedPaths.paths.size()); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("array")); + ASSERT_EQUALS(1U, modifiedPaths.paths.count("arrIndex")); +} + // // Error cases. // diff --git a/src/mongo/db/pipeline/parsed_add_fields.h b/src/mongo/db/pipeline/parsed_add_fields.h index 240b17faead..58b8445e727 100644 --- a/src/mongo/db/pipeline/parsed_add_fields.h +++ b/src/mongo/db/pipeline/parsed_add_fields.h @@ -96,6 +96,11 @@ public: return DocumentSource::SEE_NEXT; } + DocumentSource::GetModPathsReturn getModifiedPaths() const final { + // TODO SERVER-25510 Only report added paths as modified. + return {DocumentSource::GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}}; + } + /** * Add the specified fields to 'inputDoc'. * diff --git a/src/mongo/db/pipeline/parsed_exclusion_projection.cpp b/src/mongo/db/pipeline/parsed_exclusion_projection.cpp index 198a85e7a69..8226d146009 100644 --- a/src/mongo/db/pipeline/parsed_exclusion_projection.cpp +++ b/src/mongo/db/pipeline/parsed_exclusion_projection.cpp @@ -121,6 +121,16 @@ Value ExclusionNode::applyProjectionToValue(Value val) const { } } +void ExclusionNode::addModifiedPaths(std::set<std::string>* modifiedPaths) const { + for (auto&& excludedField : _excludedFields) { + modifiedPaths->insert(FieldPath::getFullyQualifiedPath(_pathToNode, excludedField)); + } + + for (auto&& childPair : _children) { + childPair.second->addModifiedPaths(modifiedPaths); + } +} + // // ParsedExclusionProjection. // diff --git a/src/mongo/db/pipeline/parsed_exclusion_projection.h b/src/mongo/db/pipeline/parsed_exclusion_projection.h index 18fb7dc1a60..ea7b25ac33f 100644 --- a/src/mongo/db/pipeline/parsed_exclusion_projection.h +++ b/src/mongo/db/pipeline/parsed_exclusion_projection.h @@ -71,6 +71,7 @@ public: */ ExclusionNode* addOrGetChild(FieldPath field); + void addModifiedPaths(std::set<std::string>* modifiedPaths) const; private: // Helpers for addOrGetChild above. @@ -116,10 +117,16 @@ public: */ Document applyProjection(Document inputDoc) const final; - DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const { + DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const final { return DocumentSource::SEE_NEXT; } + DocumentSource::GetModPathsReturn getModifiedPaths() const final { + std::set<std::string> modifiedPaths; + _root->addModifiedPaths(&modifiedPaths); + return {DocumentSource::GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)}; + } + private: /** * Helper for parse() above. diff --git a/src/mongo/db/pipeline/parsed_exclusion_projection_test.cpp b/src/mongo/db/pipeline/parsed_exclusion_projection_test.cpp index a9fb218d8ef..3673330dbfb 100644 --- a/src/mongo/db/pipeline/parsed_exclusion_projection_test.cpp +++ b/src/mongo/db/pipeline/parsed_exclusion_projection_test.cpp @@ -117,6 +117,29 @@ TEST(ExclusionProjection, ShouldNotAddAnyDependencies) { ASSERT_FALSE(deps.getNeedTextScore()); } +TEST(ExclusionProjection, ShouldReportExcludedFieldsAsModified) { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("_id" << false << "a" << false << "b.c" << false)); + + auto modifiedPaths = exclusion.getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet); + ASSERT_EQ(modifiedPaths.paths.count("_id"), 1UL); + ASSERT_EQ(modifiedPaths.paths.count("a"), 1UL); + ASSERT_EQ(modifiedPaths.paths.count("b.c"), 1UL); + ASSERT_EQ(modifiedPaths.paths.size(), 3UL); +} + +TEST(ExclusionProjection, ShouldReportExcludedFieldsAsModifiedWhenSpecifiedAsNestedObj) { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("a" << BSON("b" << false << "c" << BSON("d" << false)))); + + auto modifiedPaths = exclusion.getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet); + ASSERT_EQ(modifiedPaths.paths.count("a.b"), 1UL); + ASSERT_EQ(modifiedPaths.paths.count("a.c.d"), 1UL); + ASSERT_EQ(modifiedPaths.paths.size(), 2UL); +} + // // Tests of execution of exclusions at the top level. // diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.cpp b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp index 4842500cd90..77fccf8107f 100644 --- a/src/mongo/db/pipeline/parsed_inclusion_projection.cpp +++ b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp @@ -236,6 +236,18 @@ InclusionNode* InclusionNode::addChild(string field) { return insertedPair.first->second.get(); } +void InclusionNode::addPreservedPaths(std::set<std::string>* preservedPaths) const { + // Only our inclusion paths are preserved. This inclusion node may also have paths with + // associated expressions, but those paths are modified and therefore are not considered + // "preserved". + for (auto&& includedField : _inclusions) { + preservedPaths->insert(FieldPath::getFullyQualifiedPath(_pathToNode, includedField)); + } + for (auto&& childPair : _children) { + childPair.second->addPreservedPaths(preservedPaths); + } +} + // // ParsedInclusionProjection // diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.h b/src/mongo/db/pipeline/parsed_inclusion_projection.h index eb2bc1877bc..8cd04514b53 100644 --- a/src/mongo/db/pipeline/parsed_inclusion_projection.h +++ b/src/mongo/db/pipeline/parsed_inclusion_projection.h @@ -121,6 +121,11 @@ public: void injectExpressionContext(const boost::intrusive_ptr<ExpressionContext>& expCtx); + /** + * Recursively add all paths that are preserved by this inclusion projection. + */ + void addPreservedPaths(std::set<std::string>* preservedPaths) const; + private: // Helpers for the Document versions above. These will apply the transformation recursively to // each element of any arrays, and ensure non-documents are handled appropriately. @@ -214,6 +219,12 @@ public: return DocumentSource::EXHAUSTIVE_FIELDS; } + DocumentSource::GetModPathsReturn getModifiedPaths() const final { + std::set<std::string> preservedPaths; + _root->addPreservedPaths(&preservedPaths); + return {DocumentSource::GetModPathsReturn::Type::kAllExcept, std::move(preservedPaths)}; + } + /** * Apply this exclusion projection to 'inputDoc'. * diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp b/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp index b2833638543..56da137715e 100644 --- a/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp +++ b/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp @@ -174,6 +174,48 @@ TEST(InclusionProjection, ShouldOptimizeNestedExpressions) { ASSERT_DOCUMENT_EQ(expectedSerialization, inclusion.serialize(true)); } +TEST(InclusionProjection, ShouldReportThatAllExceptIncludedFieldsAreModified) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON( + "a" << wrapInLiteral("computedVal") << "b.c" << wrapInLiteral("computedVal") << "d" << true + << "e.f" + << true)); + + auto modifiedPaths = inclusion.getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kAllExcept); + // Included paths are not modified. + ASSERT_EQ(modifiedPaths.paths.count("_id"), 1UL); + ASSERT_EQ(modifiedPaths.paths.count("d"), 1UL); + ASSERT_EQ(modifiedPaths.paths.count("e.f"), 1UL); + // Computed paths are modified. + ASSERT_EQ(modifiedPaths.paths.count("a"), 0UL); + ASSERT_EQ(modifiedPaths.paths.count("b.c"), 0UL); + ASSERT_EQ(modifiedPaths.paths.size(), 3UL); +} + +TEST(InclusionProjection, ShouldReportThatAllExceptIncludedFieldsAreModifiedWithIdExclusion) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("_id" << false << "a" << wrapInLiteral("computedVal") << "b.c" + << wrapInLiteral("computedVal") + << "d" + << true + << "e.f" + << true)); + + auto modifiedPaths = inclusion.getModifiedPaths(); + ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kAllExcept); + // Included paths are not modified. + ASSERT_EQ(modifiedPaths.paths.count("d"), 1UL); + ASSERT_EQ(modifiedPaths.paths.count("e.f"), 1UL); + // Computed paths are modified. + ASSERT_EQ(modifiedPaths.paths.count("a"), 0UL); + ASSERT_EQ(modifiedPaths.paths.count("b.c"), 0UL); + // _id is explicitly excluded. + ASSERT_EQ(modifiedPaths.paths.count("_id"), 0UL); + + ASSERT_EQ(modifiedPaths.paths.size(), 2UL); +} + // // Top-level only. // diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp index 16625527383..315b0b10cfd 100644 --- a/src/mongo/db/pipeline/pipeline_test.cpp +++ b/src/mongo/db/pipeline/pipeline_test.cpp @@ -338,6 +338,7 @@ class LookupShouldSplitMatch : public Base { " {$match: {asField: {$eq: 3}}}]"; } }; + class LookupShouldNotAbsorbMatchOnAs : public Base { string inputPipeJson() { return "[{$lookup: {from: 'foo', as: 'asField', localField: 'y', foreignField: 'z'}}, " @@ -714,6 +715,134 @@ class GraphLookupShouldNotCoalesceWithUnwindNotOnAs : public Base { } }; +class GraphLookupShouldSwapWithMatch : public Base { + string inputPipeJson() { + return "[{$graphLookup: {" + " from: 'coll2'," + " as: 'results'," + " connectToField: 'to'," + " connectFromField: 'from'," + " startWith: '$startVal'" + " }}," + " {$match: {independent: 'x'}}" + "]"; + } + string outputPipeJson() { + return "[{$match: {independent: 'x'}}," + " {$graphLookup: {" + " from: 'coll2'," + " as: 'results'," + " connectToField: 'to'," + " connectFromField: 'from'," + " startWith: '$startVal'" + " }}]"; + } +}; + +class ExclusionProjectShouldSwapWithIndependentMatch : public Base { + string inputPipeJson() final { + return "[{$project: {redacted: 0}}, {$match: {unrelated: 4}}]"; + } + string outputPipeJson() final { + return "[{$match: {unrelated: 4}}, {$project: {redacted: false}}]"; + } +}; + +class ExclusionProjectShouldNotSwapWithMatchOnExcludedFields : public Base { + string inputPipeJson() final { + return "[{$project: {subdoc: {redacted: false}}}, {$match: {'subdoc.redacted': 4}}]"; + } + string outputPipeJson() final { + return inputPipeJson(); + } +}; + +class MatchShouldSplitIfPartIsIndependentOfExclusionProjection : public Base { + string inputPipeJson() final { + return "[{$project: {redacted: 0}}," + " {$match: {redacted: 'x', unrelated: 4}}]"; + } + string outputPipeJson() final { + return "[{$match: {unrelated: {$eq: 4}}}," + " {$project: {redacted: false}}," + " {$match: {redacted: {$eq: 'x'}}}]"; + } +}; + +class InclusionProjectShouldSwapWithIndependentMatch : public Base { + string inputPipeJson() final { + return "[{$project: {included: 1}}, {$match: {included: 4}}]"; + } + string outputPipeJson() final { + return "[{$match: {included: 4}}, {$project: {_id: true, included: true}}]"; + } +}; + +class InclusionProjectShouldNotSwapWithMatchOnFieldsNotIncluded : public Base { + string inputPipeJson() final { + return "[{$project: {_id: true, included: true, subdoc: {included: true}}}," + " {$match: {notIncluded: 'x', unrelated: 4}}]"; + } + string outputPipeJson() final { + return inputPipeJson(); + } +}; + +class MatchShouldSplitIfPartIsIndependentOfInclusionProjection : public Base { + string inputPipeJson() final { + return "[{$project: {_id: true, included: true}}," + " {$match: {included: 'x', unrelated: 4}}]"; + } + string outputPipeJson() final { + return "[{$match: {included: {$eq: 'x'}}}," + " {$project: {_id: true, included: true}}," + " {$match: {unrelated: {$eq: 4}}}]"; + } +}; + +class TwoMatchStagesShouldBothPushIndependentPartsBeforeProjection : public Base { + string inputPipeJson() final { + return "[{$project: {_id: true, included: true}}," + " {$match: {included: 'x', unrelated: 4}}," + " {$match: {included: 'y', unrelated: 5}}]"; + } + string outputPipeJson() final { + return "[{$match: {$and: [{included: {$eq: 'x'}}, {included: {$eq: 'y'}}]}}," + " {$project: {_id: true, included: true}}," + " {$match: {$and: [{unrelated: {$eq: 4}}, {unrelated: {$eq: 5}}]}}]"; + } +}; + +class NeighboringMatchesShouldCoalesce : public Base { + string inputPipeJson() final { + return "[{$match: {x: 'x'}}," + " {$match: {y: 'y'}}]"; + } + string outputPipeJson() final { + return "[{$match: {$and: [{x: 'x'}, {y: 'y'}]}}]"; + } +}; + +class MatchShouldNotSwapBeforeLimit : public Base { + string inputPipeJson() final { + return "[{$limit: 3}," + " {$match: {y: 'y'}}]"; + } + string outputPipeJson() final { + return inputPipeJson(); + } +}; + +class MatchShouldNotSwapBeforeSkip : public Base { + string inputPipeJson() final { + return "[{$skip: 3}," + " {$match: {y: 'y'}}]"; + } + string outputPipeJson() final { + return inputPipeJson(); + } +}; + } // namespace Local namespace Sharded { @@ -1324,6 +1453,7 @@ public: add<Optimizations::Local::GraphLookupShouldCoalesceWithUnwindOnAsWithPreserveEmpty>(); add<Optimizations::Local::GraphLookupShouldCoalesceWithUnwindOnAsWithIncludeArrayIndex>(); add<Optimizations::Local::GraphLookupShouldNotCoalesceWithUnwindNotOnAs>(); + add<Optimizations::Local::GraphLookupShouldSwapWithMatch>(); add<Optimizations::Local::MatchShouldDuplicateItselfBeforeRedact>(); add<Optimizations::Local::MatchShouldSwapWithUnwind>(); add<Optimizations::Local::MatchShouldNotOptimizeWhenMatchingOnIndexField>(); @@ -1333,6 +1463,16 @@ public: add<Optimizations::Local::MatchWithOrDoesNotSplit>(); add<Optimizations::Local::MatchShouldSplitOnUnwind>(); add<Optimizations::Local::UnwindBeforeDoubleMatchShouldRepeatedlyOptimize>(); + add<Optimizations::Local::ExclusionProjectShouldSwapWithIndependentMatch>(); + add<Optimizations::Local::ExclusionProjectShouldNotSwapWithMatchOnExcludedFields>(); + add<Optimizations::Local::MatchShouldSplitIfPartIsIndependentOfExclusionProjection>(); + add<Optimizations::Local::InclusionProjectShouldSwapWithIndependentMatch>(); + add<Optimizations::Local::InclusionProjectShouldNotSwapWithMatchOnFieldsNotIncluded>(); + add<Optimizations::Local::MatchShouldSplitIfPartIsIndependentOfInclusionProjection>(); + add<Optimizations::Local::TwoMatchStagesShouldBothPushIndependentPartsBeforeProjection>(); + add<Optimizations::Local::NeighboringMatchesShouldCoalesce>(); + add<Optimizations::Local::MatchShouldNotSwapBeforeLimit>(); + add<Optimizations::Local::MatchShouldNotSwapBeforeSkip>(); add<Optimizations::Sharded::Empty>(); add<Optimizations::Sharded::coalesceLookUpAndUnwind::ShouldCoalesceUnwindOnAs>(); add<Optimizations::Sharded::coalesceLookUpAndUnwind:: |