diff options
author | Henri Nikku <henri.nikku@mongodb.com> | 2022-07-05 08:48:26 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-07-05 10:01:41 +0000 |
commit | 71392af499bde03dae8d25b4407e204869dce5be (patch) | |
tree | 56c95ef652360bfec2c6451dc24cefbeb169039a /src | |
parent | 304a34e0c9b20800b60ad5af478e206ccf61932c (diff) | |
download | mongo-71392af499bde03dae8d25b4407e204869dce5be.tar.gz |
SERVER-61702 Implement stage rewrite optimizations for $graphLookup
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/pipeline/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_graph_lookup.cpp | 37 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_graph_lookup.h | 24 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_lookup.cpp | 44 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_test.cpp | 91 | ||||
-rw-r--r-- | src/mongo/db/pipeline/sort_reorder_helpers.cpp | 78 | ||||
-rw-r--r-- | src/mongo/db/pipeline/sort_reorder_helpers.h | 51 |
7 files changed, 265 insertions, 61 deletions
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index 4379213d7f0..47f5acb67d7 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -326,6 +326,7 @@ pipelineEnv.Library( 'semantic_analysis.cpp', 'sequential_document_cache.cpp', 'skip_and_limit.cpp', + 'sort_reorder_helpers.cpp', 'tee_buffer.cpp', 'visitors/document_source_walker.cpp', 'visitors/transformer_interface_walker.cpp', diff --git a/src/mongo/db/pipeline/document_source_graph_lookup.cpp b/src/mongo/db/pipeline/document_source_graph_lookup.cpp index 5957299c224..0f1ffbe7db7 100644 --- a/src/mongo/db/pipeline/document_source_graph_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_graph_lookup.cpp @@ -44,8 +44,8 @@ #include "mongo/db/pipeline/expression.h" #include "mongo/db/pipeline/expression_context.h" #include "mongo/db/pipeline/lite_parsed_pipeline.h" +#include "mongo/db/pipeline/sort_reorder_helpers.h" #include "mongo/db/query/query_knobs_gen.h" -#include "mongo/db/query/query_planner_common.h" #include "mongo/db/views/resolved_view.h" #include "mongo/logv2/log.h" @@ -520,6 +520,31 @@ DocumentSource::GetModPathsReturn DocumentSourceGraphLookUp::getModifiedPaths() return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths), {}}; } +StageConstraints DocumentSourceGraphLookUp::constraints(Pipeline::SplitState pipeState) const { + // If we are in a mongos, the from collection of the graphLookup is sharded, and the + // 'featureFlagShardedLookup' flag is enabled, the host type requirement is mongos or + // a shard. Otherwise, it's the primary shard. + HostTypeRequirement hostRequirement = + (pExpCtx->inMongos && pExpCtx->mongoProcessInterface->isSharded(pExpCtx->opCtx, _from) && + foreignShardedGraphLookupAllowed()) + ? HostTypeRequirement::kNone + : HostTypeRequirement::kPrimaryShard; + + StageConstraints constraints(StreamType::kStreaming, + PositionRequirement::kNone, + hostRequirement, + DiskUseRequirement::kNoDiskUse, + FacetRequirement::kAllowed, + TransactionRequirement::kAllowed, + LookupRequirement::kAllowed, + UnionRequirement::kAllowed); + + constraints.canSwapWithMatch = true; + constraints.canSwapWithSkippingOrLimitingStage = !_unwind; + + return constraints; +} + Pipeline::SourceContainer::iterator DocumentSourceGraphLookUp::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); @@ -536,6 +561,16 @@ Pipeline::SourceContainer::iterator DocumentSourceGraphLookUp::doOptimizeAt( container->erase(std::next(itr)); return itr; } + + // If the following stage is $sort and there is no internal $unwind, consider pushing it ahead + // of $graphLookup. + if (!_unwind) { + itr = tryReorderingWithSort(itr, container); + if (*itr != this) { + return itr; + } + } + return std::next(itr); } diff --git a/src/mongo/db/pipeline/document_source_graph_lookup.h b/src/mongo/db/pipeline/document_source_graph_lookup.h index bc0146f1829..8f2eff1723c 100644 --- a/src/mongo/db/pipeline/document_source_graph_lookup.h +++ b/src/mongo/db/pipeline/document_source_graph_lookup.h @@ -104,29 +104,7 @@ public: */ GetModPathsReturn getModifiedPaths() const final; - StageConstraints constraints(Pipeline::SplitState pipeState) const final { - // If we are in a mongos, the from collection of the graphLookup is sharded, and the - // 'featureFlagShardedLookup' flag is enabled, the host type requirement is mongos or - // a shard. Otherwise, it's the primary shard. - HostTypeRequirement hostRequirement = - (pExpCtx->inMongos && - pExpCtx->mongoProcessInterface->isSharded(pExpCtx->opCtx, _from) && - foreignShardedGraphLookupAllowed()) - ? HostTypeRequirement::kNone - : HostTypeRequirement::kPrimaryShard; - - StageConstraints constraints(StreamType::kStreaming, - PositionRequirement::kNone, - hostRequirement, - DiskUseRequirement::kNoDiskUse, - FacetRequirement::kAllowed, - TransactionRequirement::kAllowed, - LookupRequirement::kAllowed, - UnionRequirement::kAllowed); - - constraints.canSwapWithMatch = true; - return constraints; - } + StageConstraints constraints(Pipeline::SplitState pipeState) const final; boost::optional<DistributedPlanLogic> distributedPlanLogic() final; diff --git a/src/mongo/db/pipeline/document_source_lookup.cpp b/src/mongo/db/pipeline/document_source_lookup.cpp index f29df62a1a0..4f9a2355aaf 100644 --- a/src/mongo/db/pipeline/document_source_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_lookup.cpp @@ -41,6 +41,7 @@ #include "mongo/db/pipeline/document_source_sort.h" #include "mongo/db/pipeline/expression.h" #include "mongo/db/pipeline/expression_context.h" +#include "mongo/db/pipeline/sort_reorder_helpers.h" #include "mongo/db/pipeline/variable_validation.h" #include "mongo/db/query/collation/collator_factory_interface.h" #include "mongo/db/query/query_knobs_gen.h" @@ -116,33 +117,6 @@ NamespaceString parseLookupFromAndResolveNamespace(const BSONElement& elem, return nss; } -/** - * Checks if a sort stage's pattern is suitable to push the stage before $lookup. The sort stage - * must not share the same prefix with any field created or modified by the lookup stage. - */ -bool checkModifiedPathsSortReorder(const SortPattern& sortPattern, - const DocumentSource::GetModPathsReturn& modPaths) { - for (const auto& sortKey : sortPattern) { - if (!sortKey.fieldPath.has_value()) { - return false; - } - if (sortKey.fieldPath->getPathLength() < 1) { - return false; - } - auto sortField = sortKey.fieldPath->getFieldName(0); - auto it = std::find_if( - modPaths.paths.begin(), modPaths.paths.end(), [&sortField](const auto& modPath) { - // Finds if the shorter path is a prefix field of or the same as the longer one. - return sortField == modPath || expression::isPathPrefixOf(sortField, modPath) || - expression::isPathPrefixOf(modPath, sortField); - }); - if (it != modPaths.paths.end()) { - return false; - } - } - return true; -} - } // namespace DocumentSourceLookUp::DocumentSourceLookUp( @@ -683,16 +657,12 @@ Pipeline::SourceContainer::iterator DocumentSourceLookUp::doOptimizeAt( return container->end(); } - // If the following stage is $sort, consider pushing it ahead of $lookup. - if (auto sortPtr = dynamic_cast<DocumentSourceSort*>(std::next(itr)->get())) { - // TODO (SERVER-55417): Conditionally reorder $sort and $lookup depending on whether the - // query planner allows for an index-provided sort. - if (!_unwindSrc && - checkModifiedPathsSortReorder(sortPtr->getSortKeyPattern(), getModifiedPaths())) { - // We have a sort not on as field following this stage. Reorder sort and current doc. - std::swap(*itr, *std::next(itr)); - - return itr == container->begin() ? itr : std::prev(itr); + // If the following stage is $sort and there is no internal $unwind, consider pushing it ahead + // of $lookup. + if (!_unwindSrc) { + itr = tryReorderingWithSort(itr, container); + if (*itr != this) { + return itr; } } diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp index 16833172312..b3545f78985 100644 --- a/src/mongo/db/pipeline/pipeline_test.cpp +++ b/src/mongo/db/pipeline/pipeline_test.cpp @@ -1867,6 +1867,97 @@ TEST(PipelineOptimizationTest, GraphLookupShouldSwapWithMatch) { assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); } +TEST(PipelineOptimizationTest, GraphLookupShouldSwapWithSortNotOnAs) { + string inputPipe = + "[" + " {$graphLookup: {" + " from: 'lookupColl'," + " as: 'out'," + " connectToField: 'to'," + " connectFromField: 'from'," + " startWith: '$start'" + " }}," + " {$sort: {from: 1}}" + "]"; + string outputPipe = + "[" + " {$sort: {sortKey: {from: 1}}}," + " {$graphLookup: {" + " from: 'lookupColl'," + " as: 'out'," + " connectToField: 'to'," + " connectFromField: 'from'," + " startWith: '$start'" + " }}" + "]"; + string serializedPipe = + "[" + " {$sort: {from: 1}}," + " {$graphLookup: {" + " from: 'lookupColl'," + " as: 'out'," + " connectToField: 'to'," + " connectFromField: 'from'," + " startWith: '$start'" + " }}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, GraphLookupWithInternalUnwindShouldNotSwapWithSortNotOnAs) { + string inputPipe = + "[" + " {$graphLookup: {" + " from: 'lookupColl'," + " as: 'out'," + " connectToField: 'to'," + " connectFromField: 'from'," + " startWith: '$start'" + " }}," + " {$unwind: {path: '$out', includeArrayIndex: 'index'}}," + " {$sort: {from: 1}}" + "]"; + string outputPipe = + "[" + " {$graphLookup: {" + " from: 'lookupColl'," + " as: 'out'," + " connectToField: 'to'," + " connectFromField: 'from'," + " startWith: '$start'," + " unwinding: {preserveNullAndEmptyArrays: false, includeArrayIndex: 'index'}" + " }}," + " {$sort: {sortKey: {from: 1}}}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, inputPipe); +} + +TEST(PipelineOptimizationTest, GraphLookupShouldNotSwapWithSortOnAs) { + string inputPipe = + "[" + " {$graphLookup: {" + " from: 'lookupColl'," + " as: 'out'," + " connectToField: 'to'," + " connectFromField: 'from'," + " startWith: '$start'" + " }}," + " {$sort: {out: 1}}" + "]"; + string outputPipe = + "[" + " {$graphLookup: {" + " from: 'lookupColl'," + " as: 'out'," + " connectToField: 'to'," + " connectFromField: 'from'," + " startWith: '$start'" + " }}," + " {$sort: {sortKey: {out: 1}}}" + "]"; + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, inputPipe); +} + TEST(PipelineOptimizationTest, ExclusionProjectShouldSwapWithIndependentMatch) { string inputPipe = "[{$project: {redacted: 0}}, {$match: {unrelated: 4}}]"; string outputPipe = diff --git a/src/mongo/db/pipeline/sort_reorder_helpers.cpp b/src/mongo/db/pipeline/sort_reorder_helpers.cpp new file mode 100644 index 00000000000..f8ff49f6eab --- /dev/null +++ b/src/mongo/db/pipeline/sort_reorder_helpers.cpp @@ -0,0 +1,78 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/pipeline/sort_reorder_helpers.h" + +namespace mongo { + +bool checkModifiedPathsSortReorder(const SortPattern& sortPattern, + const DocumentSource::GetModPathsReturn& modPaths) { + for (const auto& sortKey : sortPattern) { + if (!sortKey.fieldPath.has_value()) { + return false; + } + if (sortKey.fieldPath->getPathLength() < 1) { + return false; + } + auto sortField = sortKey.fieldPath->getFieldName(0); + auto it = std::find_if( + modPaths.paths.begin(), modPaths.paths.end(), [&sortField](const auto& modPath) { + // Finds if the shorter path is a prefix field of or the same as the longer one. + return sortField == modPath || expression::isPathPrefixOf(sortField, modPath) || + expression::isPathPrefixOf(modPath, sortField); + }); + if (it != modPaths.paths.end()) { + return false; + } + } + return true; +} + +Pipeline::SourceContainer::iterator tryReorderingWithSort(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) { + auto docSource = itr->get(); + invariant(dynamic_cast<DocumentSourceLookUp*>(docSource) || + dynamic_cast<DocumentSourceGraphLookUp*>(docSource)); + + // If we have $graphLookup or $lookup followed by $sort, and $sort does not use any fields + // created by it, they can swap. + // TODO (SERVER-55417): Conditionally reorder $sort and $lookup depending on whether the + // query planner allows for an index-provided sort. + auto nextSort = dynamic_cast<DocumentSourceSort*>(std::next(itr)->get()); + if (nextSort && + checkModifiedPathsSortReorder(nextSort->getSortKeyPattern(), + docSource->getModifiedPaths())) { + std::swap(*itr, *std::next(itr)); + return itr == container->begin() ? itr : std::prev(itr); + } + + return itr; +} + +} // namespace mongo diff --git a/src/mongo/db/pipeline/sort_reorder_helpers.h b/src/mongo/db/pipeline/sort_reorder_helpers.h new file mode 100644 index 00000000000..c5d3b6ca53a --- /dev/null +++ b/src/mongo/db/pipeline/sort_reorder_helpers.h @@ -0,0 +1,51 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/pipeline/document_source.h" +#include "mongo/db/pipeline/document_source_graph_lookup.h" +#include "mongo/db/pipeline/document_source_lookup.h" +#include "mongo/db/pipeline/pipeline.h" + +namespace mongo { + +/** + * Checks if a sort stage's pattern is suitable to push the stage before $lookup or $graphLookup. + * The sort stage must not share the same prefix with any field created or modified by the lookup + * stage. + */ +bool checkModifiedPathsSortReorder(const SortPattern& sortPattern, + const DocumentSource::GetModPathsReturn& modPaths); + +/** + * Tries to swap $lookup or $graphLookup with sort. + */ +Pipeline::SourceContainer::iterator tryReorderingWithSort(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container); + +} // namespace mongo |