diff options
Diffstat (limited to 'src/mongo/db/pipeline')
-rw-r--r-- | src/mongo/db/pipeline/document_source.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source.h | 20 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_graph_lookup.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_lookup.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_match.cpp | 44 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_match.h | 7 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_replace_root.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_sort.h | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_unwind.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression.h | 4 | ||||
-rw-r--r-- | src/mongo/db/pipeline/parsed_add_fields.h | 7 | ||||
-rw-r--r-- | src/mongo/db/pipeline/parsed_exclusion_projection.h | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/parsed_inclusion_projection.cpp | 25 | ||||
-rw-r--r-- | src/mongo/db/pipeline/parsed_inclusion_projection.h | 19 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_test.cpp | 79 |
15 files changed, 191 insertions, 35 deletions
diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp index 23e98350197..9651c7cb8e8 100644 --- a/src/mongo/db/pipeline/document_source.cpp +++ b/src/mongo/db/pipeline/document_source.cpp @@ -147,10 +147,15 @@ splitMatchByModifiedFields(const boost::intrusive_ptr<DocumentSourceMatch>& matc case DocumentSource::GetModPathsReturn::Type::kAllExcept: { DepsTracker depsTracker; match->getDependencies(&depsTracker); - modifiedPaths = extractModifiedDependencies(depsTracker.fields, modifiedPathsRet.paths); + + auto preservedPaths = modifiedPathsRet.paths; + for (auto&& rename : modifiedPathsRet.renames) { + preservedPaths.insert(rename.first); + } + modifiedPaths = extractModifiedDependencies(depsTracker.fields, preservedPaths); } } - return match->splitSourceBy(modifiedPaths); + return match->splitSourceBy(modifiedPaths, modifiedPathsRet.renames); } } // namespace diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index 2b014c37a61..7b724b555cd 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -366,11 +366,25 @@ public: kAllExcept, }; - GetModPathsReturn(Type type, std::set<std::string>&& paths) - : type(type), paths(std::move(paths)) {} + GetModPathsReturn(Type type, + std::set<std::string>&& paths, + StringMap<std::string>&& renames) + : type(type), paths(std::move(paths)), renames(std::move(renames)) {} Type type; std::set<std::string> paths; + + // Stages may fill out 'renames' to contain information about path renames. Each entry in + // 'renames' maps from the new name of the path (valid in documents flowing *out* of this + // stage) to the old name of the path (valid in documents flowing *into* this stage). + // + // For example, consider the stage + // + // {$project: {_id: 0, a: 1, b: "$c"}} + // + // This stage should return kAllExcept, since it modifies all paths other than "a". It can + // also fill out 'renames' with the mapping "b" => "c". + StringMap<std::string> renames; }; /** @@ -381,7 +395,7 @@ public: * See GetModPathsReturn above for the possible return values and what they mean. */ virtual GetModPathsReturn getModifiedPaths() const { - return {GetModPathsReturn::Type::kNotSupported, std::set<std::string>{}}; + return {GetModPathsReturn::Type::kNotSupported, std::set<std::string>{}, {}}; } /** diff --git a/src/mongo/db/pipeline/document_source_graph_lookup.cpp b/src/mongo/db/pipeline/document_source_graph_lookup.cpp index 717b82d59aa..7cc78ba8e95 100644 --- a/src/mongo/db/pipeline/document_source_graph_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_graph_lookup.cpp @@ -351,7 +351,7 @@ DocumentSource::GetModPathsReturn DocumentSourceGraphLookUp::getModifiedPaths() modifiedPaths.insert(pathsModifiedByUnwind.paths.begin(), pathsModifiedByUnwind.paths.end()); } - return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)}; + return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths), {}}; } Pipeline::SourceContainer::iterator DocumentSourceGraphLookUp::doOptimizeAt( diff --git a/src/mongo/db/pipeline/document_source_lookup.cpp b/src/mongo/db/pipeline/document_source_lookup.cpp index b6cf88172c3..4c68db3f1da 100644 --- a/src/mongo/db/pipeline/document_source_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_lookup.cpp @@ -174,7 +174,7 @@ DocumentSource::GetModPathsReturn DocumentSourceLookUp::getModifiedPaths() const modifiedPaths.insert(pathsModifiedByUnwind.paths.begin(), pathsModifiedByUnwind.paths.end()); } - return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)}; + return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths), {}}; } Pipeline::SourceContainer::iterator DocumentSourceLookUp::doOptimizeAt( diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp index 0f611b04423..1b7d19f2d2f 100644 --- a/src/mongo/db/pipeline/document_source_match.cpp +++ b/src/mongo/db/pipeline/document_source_match.cpp @@ -371,37 +371,49 @@ void DocumentSourceMatch::joinMatchWith(intrusive_ptr<DocumentSourceMatch> other } pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>> -DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields) { +DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields, + const StringMap<std::string>& renames) { pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> newExpr( - expression::splitMatchExpressionBy(std::move(_expression), fields)); + expression::splitMatchExpressionBy(std::move(_expression), fields, renames)); invariant(newExpr.first || newExpr.second); if (!newExpr.first) { - // The entire $match depends on 'fields'. + // The entire $match depends on 'fields'. It cannot be split or moved, so we return this + // stage without modification as the second stage in the pair. _expression = std::move(newExpr.second); return {nullptr, this}; - } else if (!newExpr.second) { - // This $match is entirely independent of 'fields'. + } + + if (!newExpr.second && renames.empty()) { + // This $match is entirely independent of 'fields' and there were no renames to apply. In + // this case, the current stage can swap with its predecessor without modification. We + // simply return this as the first stage in the pair. _expression = std::move(newExpr.first); return {this, nullptr}; } - // A MatchExpression requires that it is outlived by the BSONObj it is parsed from. Since the - // original BSONObj this $match was created from is no longer equivalent to either of the - // MatchExpressions we return, we instead take each of these expressions, serialize them, and - // then re-parse them, constructing new BSON that is owned by the DocumentSourceMatch. - - // Build an expression for a new $match stage. + // If we're here, then either: + // - this stage has split into two, or + // - this stage can swap with its predecessor, but potentially had renames applied. + // + // In any of these cases, we have created new expression(s). A MatchExpression requires that it + // is outlived by the BSONObj it is parsed from. But since the MatchExpressions were modified, + // the corresponding BSONObj may not exist. Therefore, we take each of these expressions, + // serialize them, and then re-parse them, constructing new BSON that is owned by the + // DocumentSourceMatch. BSONObjBuilder firstBob; newExpr.first->serialize(&firstBob); + auto firstMatch = DocumentSourceMatch::create(firstBob.obj(), pExpCtx); - // This $match stage is still needed, so update the MatchExpression as needed. - BSONObjBuilder secondBob; - newExpr.second->serialize(&secondBob); + intrusive_ptr<DocumentSourceMatch> secondMatch; + if (newExpr.second) { + BSONObjBuilder secondBob; + newExpr.second->serialize(&secondBob); + secondMatch = DocumentSourceMatch::create(secondBob.obj(), pExpCtx); + } - return {DocumentSourceMatch::create(firstBob.obj(), pExpCtx), - DocumentSourceMatch::create(secondBob.obj(), pExpCtx)}; + return {std::move(firstMatch), std::move(secondMatch)}; } boost::intrusive_ptr<DocumentSourceMatch> DocumentSourceMatch::descendMatchOnPath( diff --git a/src/mongo/db/pipeline/document_source_match.h b/src/mongo/db/pipeline/document_source_match.h index 4abd4862992..b6c41cd5161 100644 --- a/src/mongo/db/pipeline/document_source_match.h +++ b/src/mongo/db/pipeline/document_source_match.h @@ -115,9 +115,14 @@ public: * * For example, {$match: {a: "foo", "b.c": 4}} split by "b" will return pointers to two stages: * {$match: {a: "foo"}}, and {$match: {"b.c": 4}}. + * + * The 'renames' structure maps from a field to an alias that should be used in the independent + * portion of the match. For example, suppose that we split by fields "a" with the rename "b" => + * "c". The match {$match: {a: "foo", b: "bar", z: "baz"}} will split into {$match: {c: "bar", + * z: "baz"}} and {$match: {a: "foo"}}. */ std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>> - splitSourceBy(const std::set<std::string>& fields); + splitSourceBy(const std::set<std::string>& fields, const StringMap<std::string>& renames); /** * Given a document 'input', extract 'fields' and produce a BSONObj with those values. diff --git a/src/mongo/db/pipeline/document_source_replace_root.cpp b/src/mongo/db/pipeline/document_source_replace_root.cpp index 521cf748784..8f6782dc47e 100644 --- a/src/mongo/db/pipeline/document_source_replace_root.cpp +++ b/src/mongo/db/pipeline/document_source_replace_root.cpp @@ -89,7 +89,7 @@ public: DocumentSource::GetModPathsReturn getModifiedPaths() const final { // Replaces the entire root, so all paths are modified. - return {DocumentSource::GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}}; + return {DocumentSource::GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}, {}}; } // Create the replaceRoot transformer. Uasserts on invalid input. diff --git a/src/mongo/db/pipeline/document_source_sort.h b/src/mongo/db/pipeline/document_source_sort.h index 83af9a4bcb6..c3b1609b641 100644 --- a/src/mongo/db/pipeline/document_source_sort.h +++ b/src/mongo/db/pipeline/document_source_sort.h @@ -49,7 +49,7 @@ public: GetModPathsReturn getModifiedPaths() const final { // A $sort does not modify any paths. - return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}}; + return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}, {}}; } bool canSwapWithMatch() const final { diff --git a/src/mongo/db/pipeline/document_source_unwind.cpp b/src/mongo/db/pipeline/document_source_unwind.cpp index 079439bd842..1c172cb17be 100644 --- a/src/mongo/db/pipeline/document_source_unwind.cpp +++ b/src/mongo/db/pipeline/document_source_unwind.cpp @@ -236,7 +236,7 @@ DocumentSource::GetModPathsReturn DocumentSourceUnwind::getModifiedPaths() const if (_indexPath) { modifiedFields.insert(_indexPath->fullPath()); } - return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedFields)}; + return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedFields), {}}; } Value DocumentSourceUnwind::serialize(boost::optional<ExplainOptions::Verbosity> explain) const { diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h index a9eb4835a62..62bf6904fc5 100644 --- a/src/mongo/db/pipeline/expression.h +++ b/src/mongo/db/pipeline/expression.h @@ -769,6 +769,10 @@ public: return _fieldPath; } + Variables::Id getVariableId() const { + return _variable; + } + private: ExpressionFieldPath(const boost::intrusive_ptr<ExpressionContext>& expCtx, const std::string& fieldPath, diff --git a/src/mongo/db/pipeline/parsed_add_fields.h b/src/mongo/db/pipeline/parsed_add_fields.h index 8cc3d24f913..f4c1b535f63 100644 --- a/src/mongo/db/pipeline/parsed_add_fields.h +++ b/src/mongo/db/pipeline/parsed_add_fields.h @@ -95,8 +95,11 @@ public: DocumentSource::GetModPathsReturn getModifiedPaths() const final { std::set<std::string> computedPaths; - _root->addComputedPaths(&computedPaths); - return {DocumentSource::GetModPathsReturn::Type::kFiniteSet, std::move(computedPaths)}; + StringMap<std::string> renamedPaths; + _root->addComputedPaths(&computedPaths, &renamedPaths); + return {DocumentSource::GetModPathsReturn::Type::kFiniteSet, + std::move(computedPaths), + std::move(renamedPaths)}; } /** diff --git a/src/mongo/db/pipeline/parsed_exclusion_projection.h b/src/mongo/db/pipeline/parsed_exclusion_projection.h index 74314d97439..8f64ac5258e 100644 --- a/src/mongo/db/pipeline/parsed_exclusion_projection.h +++ b/src/mongo/db/pipeline/parsed_exclusion_projection.h @@ -124,7 +124,7 @@ public: DocumentSource::GetModPathsReturn getModifiedPaths() const final { std::set<std::string> modifiedPaths; _root->addModifiedPaths(&modifiedPaths); - return {DocumentSource::GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)}; + return {DocumentSource::GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths), {}}; } private: diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.cpp b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp index da6b9e72ce9..a2c80238570 100644 --- a/src/mongo/db/pipeline/parsed_inclusion_projection.cpp +++ b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp @@ -239,12 +239,33 @@ void InclusionNode::addPreservedPaths(std::set<std::string>* preservedPaths) con } } -void InclusionNode::addComputedPaths(std::set<std::string>* computedPaths) const { +void InclusionNode::addComputedPaths(std::set<std::string>* computedPaths, + StringMap<std::string>* renamedPaths) const { for (auto&& computedPair : _expressions) { + auto exprFieldPath = dynamic_cast<ExpressionFieldPath*>(computedPair.second.get()); + if (exprFieldPath) { + const auto& fieldPath = exprFieldPath->getFieldPath(); + // Make sure that the first path component is CURRENT/ROOT. If this is not explicitly + // provided by the user, we expect the system to have filled it in as the first path + // component. + // + // TODO SERVER-27115: Support field paths that have multiple components. + if (exprFieldPath->getVariableId() == Variables::kRootId && + fieldPath.getPathLength() == 2u) { + // Found a renamed path. Record it and continue, since we don't want to also put + // renamed paths inside the 'computedPaths' set. + std::string oldPath = fieldPath.tail().fullPath(); + std::string newPath = + FieldPath::getFullyQualifiedPath(_pathToNode, computedPair.first); + (*renamedPaths)[std::move(newPath)] = std::move(oldPath); + continue; + } + } + computedPaths->insert(FieldPath::getFullyQualifiedPath(_pathToNode, computedPair.first)); } for (auto&& childPair : _children) { - childPair.second->addComputedPaths(computedPaths); + childPair.second->addComputedPaths(computedPaths, renamedPaths); } } diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.h b/src/mongo/db/pipeline/parsed_inclusion_projection.h index 1c0a667b025..eb81015f8a4 100644 --- a/src/mongo/db/pipeline/parsed_inclusion_projection.h +++ b/src/mongo/db/pipeline/parsed_inclusion_projection.h @@ -126,9 +126,15 @@ public: void addPreservedPaths(std::set<std::string>* preservedPaths) const; /** - * Recursively adds all paths that are purely computed in this inclusion projection. + * Recursively adds all paths that are purely computed in this inclusion projection to + * 'computedPaths'. + * + * Computed paths that are identified as the result of a simple rename are instead filled out in + * 'renamedPaths'. Each entry in 'renamedPaths' maps from the path's new name to its old name + * prior to application of this inclusion projection. */ - void addComputedPaths(std::set<std::string>* computedPaths) const; + void addComputedPaths(std::set<std::string>* computedPaths, + StringMap<std::string>* renamedPaths) const; private: // Helpers for the Document versions above. These will apply the transformation recursively to @@ -222,7 +228,14 @@ public: DocumentSource::GetModPathsReturn getModifiedPaths() const final { std::set<std::string> preservedPaths; _root->addPreservedPaths(&preservedPaths); - return {DocumentSource::GetModPathsReturn::Type::kAllExcept, std::move(preservedPaths)}; + + std::set<std::string> computedPaths; + StringMap<std::string> renamedPaths; + _root->addComputedPaths(&computedPaths, &renamedPaths); + + return {DocumentSource::GetModPathsReturn::Type::kAllExcept, + std::move(preservedPaths), + std::move(renamedPaths)}; } /** diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp index 380a5824fd6..68dfeb35bad 100644 --- a/src/mongo/db/pipeline/pipeline_test.cpp +++ b/src/mongo/db/pipeline/pipeline_test.cpp @@ -758,6 +758,85 @@ TEST(PipelineOptimizationTest, MatchShouldNotSwapBeforeSkip) { assertPipelineOptimizesTo(pipeline, pipeline); } +TEST(PipelineOptimizationTest, MatchShouldMoveAcrossProjectRename) { + string inputPipe = "[{$project: {_id: true, a: '$b'}}, {$match: {a: {$eq: 1}}}]"; + string outputPipe = "[{$match: {b: {$eq: 1}}}, {$project: {_id: true, a: '$b'}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, MatchShouldMoveAcrossAddFieldsRename) { + string inputPipe = "[{$addFields: {a: '$b'}}, {$match: {a: {$eq: 1}}}]"; + string outputPipe = "[{$match: {b: {$eq: 1}}}, {$addFields: {a: '$b'}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, MatchShouldMoveAcrossProjectRenameWithExplicitROOT) { + string inputPipe = "[{$project: {_id: true, a: '$$ROOT.b'}}, {$match: {a: {$eq: 1}}}]"; + string outputPipe = "[{$match: {b: {$eq: 1}}}, {$project: {_id: true, a: '$$ROOT.b'}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, MatchShouldMoveAcrossAddFieldsRenameWithExplicitCURRENT) { + string inputPipe = "[{$addFields: {a: '$$CURRENT.b'}}, {$match: {a: {$eq: 1}}}]"; + string outputPipe = "[{$match: {b: {$eq: 1}}}, {$addFields: {a: '$b'}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, PartiallyDependentMatchWithRenameShouldSplitAcrossAddFields) { + string inputPipe = + "[{$addFields: {'a.b': '$c', d: {$add: ['$e', '$f']}}}," + "{$match: {$and: [{$or: [{'a.b': 1}, {x: 2}]}, {d: 3}]}}]"; + string outputPipe = + "[{$match: {$or: [{c: {$eq: 1}}, {x: {$eq: 2}}]}}," + "{$addFields: {a: {b: '$c'}, d: {$add: ['$e', '$f']}}}," + "{$match: {d: {$eq: 3}}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, NorCanSplitAcrossProjectWithRename) { + string inputPipe = + "[{$project: {_id: false, x: true, y: '$z'}}," + "{$match: {$nor: [{w: {$eq: 1}}, {y: {$eq: 1}}]}}]"; + string outputPipe = + "[{$match: {$nor: [{z: {$eq: 1}}]}}," + "{$project: {_id: false, x: true, y: '$z'}}," + "{$match: {$nor: [{w: {$eq: 1}}]}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, MatchCanMoveAcrossSeveralRenames) { + string inputPipe = + "[{$project: {_id: false, c: '$d'}}," + "{$addFields: {b: '$c'}}," + "{$project: {a: '$b', z: 1}}," + "{$match: {a: 1, z: 2}}]"; + string outputPipe = + "[{$match: {d: {$eq: 1}}}," + "{$project: {_id: false, c: '$d'}}," + "{$match: {z: {$eq: 2}}}," + "{$addFields: {b: '$c'}}," + "{$project: {_id: true, z: true, a: '$b'}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, RenameShouldNotBeAppliedToDependentMatch) { + string pipeline = + "[{$project: {_id: false, x: {$add: ['$foo', '$bar']}, y: '$z'}}," + "{$match: {$or: [{x: {$eq: 1}}, {y: {$eq: 1}}]}}]"; + assertPipelineOptimizesTo(pipeline, pipeline); +} + +TEST(PipelineOptimizationTest, MatchCannotMoveAcrossAddFieldsRenameOfDottedPath) { + string pipeline = "[{$addFields: {a: '$b.c'}}, {$match: {a: {$eq: 1}}}]"; + assertPipelineOptimizesTo(pipeline, pipeline); +} + +TEST(PipelineOptimizationTest, MatchCannotMoveAcrossProjectRenameOfDottedPath) { + string inputPipe = "[{$project: {_id: false, a: '$$CURRENT.b.c'}}, {$match: {a: {$eq: 1}}}]"; + string outputPipe = "[{$project: {_id: false, a: '$b.c'}}, {$match: {a: {$eq: 1}}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + } // namespace Local namespace Sharded { |