summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorHenri Nikku <henri.nikku@mongodb.com>2022-07-05 08:48:26 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-07-05 10:01:41 +0000
commit71392af499bde03dae8d25b4407e204869dce5be (patch)
tree56c95ef652360bfec2c6451dc24cefbeb169039a /src
parent304a34e0c9b20800b60ad5af478e206ccf61932c (diff)
downloadmongo-71392af499bde03dae8d25b4407e204869dce5be.tar.gz
SERVER-61702 Implement stage rewrite optimizations for $graphLookup
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/pipeline/SConscript1
-rw-r--r--src/mongo/db/pipeline/document_source_graph_lookup.cpp37
-rw-r--r--src/mongo/db/pipeline/document_source_graph_lookup.h24
-rw-r--r--src/mongo/db/pipeline/document_source_lookup.cpp44
-rw-r--r--src/mongo/db/pipeline/pipeline_test.cpp91
-rw-r--r--src/mongo/db/pipeline/sort_reorder_helpers.cpp78
-rw-r--r--src/mongo/db/pipeline/sort_reorder_helpers.h51
7 files changed, 265 insertions, 61 deletions
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index 4379213d7f0..47f5acb67d7 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -326,6 +326,7 @@ pipelineEnv.Library(
'semantic_analysis.cpp',
'sequential_document_cache.cpp',
'skip_and_limit.cpp',
+ 'sort_reorder_helpers.cpp',
'tee_buffer.cpp',
'visitors/document_source_walker.cpp',
'visitors/transformer_interface_walker.cpp',
diff --git a/src/mongo/db/pipeline/document_source_graph_lookup.cpp b/src/mongo/db/pipeline/document_source_graph_lookup.cpp
index 5957299c224..0f1ffbe7db7 100644
--- a/src/mongo/db/pipeline/document_source_graph_lookup.cpp
+++ b/src/mongo/db/pipeline/document_source_graph_lookup.cpp
@@ -44,8 +44,8 @@
#include "mongo/db/pipeline/expression.h"
#include "mongo/db/pipeline/expression_context.h"
#include "mongo/db/pipeline/lite_parsed_pipeline.h"
+#include "mongo/db/pipeline/sort_reorder_helpers.h"
#include "mongo/db/query/query_knobs_gen.h"
-#include "mongo/db/query/query_planner_common.h"
#include "mongo/db/views/resolved_view.h"
#include "mongo/logv2/log.h"
@@ -520,6 +520,31 @@ DocumentSource::GetModPathsReturn DocumentSourceGraphLookUp::getModifiedPaths()
return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths), {}};
}
+StageConstraints DocumentSourceGraphLookUp::constraints(Pipeline::SplitState pipeState) const {
+ // If we are in a mongos, the from collection of the graphLookup is sharded, and the
+ // 'featureFlagShardedLookup' flag is enabled, the host type requirement is mongos or
+ // a shard. Otherwise, it's the primary shard.
+ HostTypeRequirement hostRequirement =
+ (pExpCtx->inMongos && pExpCtx->mongoProcessInterface->isSharded(pExpCtx->opCtx, _from) &&
+ foreignShardedGraphLookupAllowed())
+ ? HostTypeRequirement::kNone
+ : HostTypeRequirement::kPrimaryShard;
+
+ StageConstraints constraints(StreamType::kStreaming,
+ PositionRequirement::kNone,
+ hostRequirement,
+ DiskUseRequirement::kNoDiskUse,
+ FacetRequirement::kAllowed,
+ TransactionRequirement::kAllowed,
+ LookupRequirement::kAllowed,
+ UnionRequirement::kAllowed);
+
+ constraints.canSwapWithMatch = true;
+ constraints.canSwapWithSkippingOrLimitingStage = !_unwind;
+
+ return constraints;
+}
+
Pipeline::SourceContainer::iterator DocumentSourceGraphLookUp::doOptimizeAt(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
invariant(*itr == this);
@@ -536,6 +561,16 @@ Pipeline::SourceContainer::iterator DocumentSourceGraphLookUp::doOptimizeAt(
container->erase(std::next(itr));
return itr;
}
+
+ // If the following stage is $sort and there is no internal $unwind, consider pushing it ahead
+ // of $graphLookup.
+ if (!_unwind) {
+ itr = tryReorderingWithSort(itr, container);
+ if (*itr != this) {
+ return itr;
+ }
+ }
+
return std::next(itr);
}
diff --git a/src/mongo/db/pipeline/document_source_graph_lookup.h b/src/mongo/db/pipeline/document_source_graph_lookup.h
index bc0146f1829..8f2eff1723c 100644
--- a/src/mongo/db/pipeline/document_source_graph_lookup.h
+++ b/src/mongo/db/pipeline/document_source_graph_lookup.h
@@ -104,29 +104,7 @@ public:
*/
GetModPathsReturn getModifiedPaths() const final;
- StageConstraints constraints(Pipeline::SplitState pipeState) const final {
- // If we are in a mongos, the from collection of the graphLookup is sharded, and the
- // 'featureFlagShardedLookup' flag is enabled, the host type requirement is mongos or
- // a shard. Otherwise, it's the primary shard.
- HostTypeRequirement hostRequirement =
- (pExpCtx->inMongos &&
- pExpCtx->mongoProcessInterface->isSharded(pExpCtx->opCtx, _from) &&
- foreignShardedGraphLookupAllowed())
- ? HostTypeRequirement::kNone
- : HostTypeRequirement::kPrimaryShard;
-
- StageConstraints constraints(StreamType::kStreaming,
- PositionRequirement::kNone,
- hostRequirement,
- DiskUseRequirement::kNoDiskUse,
- FacetRequirement::kAllowed,
- TransactionRequirement::kAllowed,
- LookupRequirement::kAllowed,
- UnionRequirement::kAllowed);
-
- constraints.canSwapWithMatch = true;
- return constraints;
- }
+ StageConstraints constraints(Pipeline::SplitState pipeState) const final;
boost::optional<DistributedPlanLogic> distributedPlanLogic() final;
diff --git a/src/mongo/db/pipeline/document_source_lookup.cpp b/src/mongo/db/pipeline/document_source_lookup.cpp
index f29df62a1a0..4f9a2355aaf 100644
--- a/src/mongo/db/pipeline/document_source_lookup.cpp
+++ b/src/mongo/db/pipeline/document_source_lookup.cpp
@@ -41,6 +41,7 @@
#include "mongo/db/pipeline/document_source_sort.h"
#include "mongo/db/pipeline/expression.h"
#include "mongo/db/pipeline/expression_context.h"
+#include "mongo/db/pipeline/sort_reorder_helpers.h"
#include "mongo/db/pipeline/variable_validation.h"
#include "mongo/db/query/collation/collator_factory_interface.h"
#include "mongo/db/query/query_knobs_gen.h"
@@ -116,33 +117,6 @@ NamespaceString parseLookupFromAndResolveNamespace(const BSONElement& elem,
return nss;
}
-/**
- * Checks if a sort stage's pattern is suitable to push the stage before $lookup. The sort stage
- * must not share the same prefix with any field created or modified by the lookup stage.
- */
-bool checkModifiedPathsSortReorder(const SortPattern& sortPattern,
- const DocumentSource::GetModPathsReturn& modPaths) {
- for (const auto& sortKey : sortPattern) {
- if (!sortKey.fieldPath.has_value()) {
- return false;
- }
- if (sortKey.fieldPath->getPathLength() < 1) {
- return false;
- }
- auto sortField = sortKey.fieldPath->getFieldName(0);
- auto it = std::find_if(
- modPaths.paths.begin(), modPaths.paths.end(), [&sortField](const auto& modPath) {
- // Finds if the shorter path is a prefix field of or the same as the longer one.
- return sortField == modPath || expression::isPathPrefixOf(sortField, modPath) ||
- expression::isPathPrefixOf(modPath, sortField);
- });
- if (it != modPaths.paths.end()) {
- return false;
- }
- }
- return true;
-}
-
} // namespace
DocumentSourceLookUp::DocumentSourceLookUp(
@@ -683,16 +657,12 @@ Pipeline::SourceContainer::iterator DocumentSourceLookUp::doOptimizeAt(
return container->end();
}
- // If the following stage is $sort, consider pushing it ahead of $lookup.
- if (auto sortPtr = dynamic_cast<DocumentSourceSort*>(std::next(itr)->get())) {
- // TODO (SERVER-55417): Conditionally reorder $sort and $lookup depending on whether the
- // query planner allows for an index-provided sort.
- if (!_unwindSrc &&
- checkModifiedPathsSortReorder(sortPtr->getSortKeyPattern(), getModifiedPaths())) {
- // We have a sort not on as field following this stage. Reorder sort and current doc.
- std::swap(*itr, *std::next(itr));
-
- return itr == container->begin() ? itr : std::prev(itr);
+ // If the following stage is $sort and there is no internal $unwind, consider pushing it ahead
+ // of $lookup.
+ if (!_unwindSrc) {
+ itr = tryReorderingWithSort(itr, container);
+ if (*itr != this) {
+ return itr;
}
}
diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp
index 16833172312..b3545f78985 100644
--- a/src/mongo/db/pipeline/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/pipeline_test.cpp
@@ -1867,6 +1867,97 @@ TEST(PipelineOptimizationTest, GraphLookupShouldSwapWithMatch) {
assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
}
+TEST(PipelineOptimizationTest, GraphLookupShouldSwapWithSortNotOnAs) {
+ string inputPipe =
+ "["
+ " {$graphLookup: {"
+ " from: 'lookupColl',"
+ " as: 'out',"
+ " connectToField: 'to',"
+ " connectFromField: 'from',"
+ " startWith: '$start'"
+ " }},"
+ " {$sort: {from: 1}}"
+ "]";
+ string outputPipe =
+ "["
+ " {$sort: {sortKey: {from: 1}}},"
+ " {$graphLookup: {"
+ " from: 'lookupColl',"
+ " as: 'out',"
+ " connectToField: 'to',"
+ " connectFromField: 'from',"
+ " startWith: '$start'"
+ " }}"
+ "]";
+ string serializedPipe =
+ "["
+ " {$sort: {from: 1}},"
+ " {$graphLookup: {"
+ " from: 'lookupColl',"
+ " as: 'out',"
+ " connectToField: 'to',"
+ " connectFromField: 'from',"
+ " startWith: '$start'"
+ " }}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, GraphLookupWithInternalUnwindShouldNotSwapWithSortNotOnAs) {
+ string inputPipe =
+ "["
+ " {$graphLookup: {"
+ " from: 'lookupColl',"
+ " as: 'out',"
+ " connectToField: 'to',"
+ " connectFromField: 'from',"
+ " startWith: '$start'"
+ " }},"
+ " {$unwind: {path: '$out', includeArrayIndex: 'index'}},"
+ " {$sort: {from: 1}}"
+ "]";
+ string outputPipe =
+ "["
+ " {$graphLookup: {"
+ " from: 'lookupColl',"
+ " as: 'out',"
+ " connectToField: 'to',"
+ " connectFromField: 'from',"
+ " startWith: '$start',"
+ " unwinding: {preserveNullAndEmptyArrays: false, includeArrayIndex: 'index'}"
+ " }},"
+ " {$sort: {sortKey: {from: 1}}}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, inputPipe);
+}
+
+TEST(PipelineOptimizationTest, GraphLookupShouldNotSwapWithSortOnAs) {
+ string inputPipe =
+ "["
+ " {$graphLookup: {"
+ " from: 'lookupColl',"
+ " as: 'out',"
+ " connectToField: 'to',"
+ " connectFromField: 'from',"
+ " startWith: '$start'"
+ " }},"
+ " {$sort: {out: 1}}"
+ "]";
+ string outputPipe =
+ "["
+ " {$graphLookup: {"
+ " from: 'lookupColl',"
+ " as: 'out',"
+ " connectToField: 'to',"
+ " connectFromField: 'from',"
+ " startWith: '$start'"
+ " }},"
+ " {$sort: {sortKey: {out: 1}}}"
+ "]";
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, inputPipe);
+}
+
TEST(PipelineOptimizationTest, ExclusionProjectShouldSwapWithIndependentMatch) {
string inputPipe = "[{$project: {redacted: 0}}, {$match: {unrelated: 4}}]";
string outputPipe =
diff --git a/src/mongo/db/pipeline/sort_reorder_helpers.cpp b/src/mongo/db/pipeline/sort_reorder_helpers.cpp
new file mode 100644
index 00000000000..f8ff49f6eab
--- /dev/null
+++ b/src/mongo/db/pipeline/sort_reorder_helpers.cpp
@@ -0,0 +1,78 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/pipeline/sort_reorder_helpers.h"
+
+namespace mongo {
+
+bool checkModifiedPathsSortReorder(const SortPattern& sortPattern,
+ const DocumentSource::GetModPathsReturn& modPaths) {
+ for (const auto& sortKey : sortPattern) {
+ if (!sortKey.fieldPath.has_value()) {
+ return false;
+ }
+ if (sortKey.fieldPath->getPathLength() < 1) {
+ return false;
+ }
+ auto sortField = sortKey.fieldPath->getFieldName(0);
+ auto it = std::find_if(
+ modPaths.paths.begin(), modPaths.paths.end(), [&sortField](const auto& modPath) {
+ // Finds if the shorter path is a prefix field of or the same as the longer one.
+ return sortField == modPath || expression::isPathPrefixOf(sortField, modPath) ||
+ expression::isPathPrefixOf(modPath, sortField);
+ });
+ if (it != modPaths.paths.end()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+Pipeline::SourceContainer::iterator tryReorderingWithSort(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) {
+ auto docSource = itr->get();
+ invariant(dynamic_cast<DocumentSourceLookUp*>(docSource) ||
+ dynamic_cast<DocumentSourceGraphLookUp*>(docSource));
+
+ // If we have $graphLookup or $lookup followed by $sort, and $sort does not use any fields
+ // created by it, they can swap.
+ // TODO (SERVER-55417): Conditionally reorder $sort and $lookup depending on whether the
+ // query planner allows for an index-provided sort.
+ auto nextSort = dynamic_cast<DocumentSourceSort*>(std::next(itr)->get());
+ if (nextSort &&
+ checkModifiedPathsSortReorder(nextSort->getSortKeyPattern(),
+ docSource->getModifiedPaths())) {
+ std::swap(*itr, *std::next(itr));
+ return itr == container->begin() ? itr : std::prev(itr);
+ }
+
+ return itr;
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/pipeline/sort_reorder_helpers.h b/src/mongo/db/pipeline/sort_reorder_helpers.h
new file mode 100644
index 00000000000..c5d3b6ca53a
--- /dev/null
+++ b/src/mongo/db/pipeline/sort_reorder_helpers.h
@@ -0,0 +1,51 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/pipeline/document_source.h"
+#include "mongo/db/pipeline/document_source_graph_lookup.h"
+#include "mongo/db/pipeline/document_source_lookup.h"
+#include "mongo/db/pipeline/pipeline.h"
+
+namespace mongo {
+
+/**
+ * Checks if a sort stage's pattern is suitable to push the stage before $lookup or $graphLookup.
+ * The sort stage must not share the same prefix with any field created or modified by the lookup
+ * stage.
+ */
+bool checkModifiedPathsSortReorder(const SortPattern& sortPattern,
+ const DocumentSource::GetModPathsReturn& modPaths);
+
+/**
+ * Tries to swap $lookup or $graphLookup with sort.
+ */
+Pipeline::SourceContainer::iterator tryReorderingWithSort(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container);
+
+} // namespace mongo