summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSvilen Mihaylov <svilen.mihaylov@mongodb.com>2023-02-09 19:00:36 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-02-09 22:35:56 +0000
commit387bd9df945ade25b5955223f04db1917782d728 (patch)
tree8fb73c9a68e4be1155ff360c1a53e55e2d8dd6d0
parent9866a24068c721f4ef99bd66bb659053cc6738ed (diff)
downloadmongo-387bd9df945ade25b5955223f04db1917782d728.tar.gz
SERVER-73821 [CQF] Split path utilities from utils.cpp
-rw-r--r--src/mongo/db/pipeline/abt/agg_expression_visitor.cpp2
-rw-r--r--src/mongo/db/pipeline/abt/canonical_query_translation.cpp1
-rw-r--r--src/mongo/db/pipeline/abt/document_source_visitor.cpp5
-rw-r--r--src/mongo/db/pipeline/abt/field_map_builder.cpp2
-rw-r--r--src/mongo/db/pipeline/abt/field_map_builder.h1
-rw-r--r--src/mongo/db/pipeline/abt/match_expression_visitor.cpp2
-rw-r--r--src/mongo/db/pipeline/abt/utils.cpp2
-rw-r--r--src/mongo/db/query/ce/histogram_estimator.cpp1
-rw-r--r--src/mongo/db/query/optimizer/SConscript1
-rw-r--r--src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp1
-rw-r--r--src/mongo/db/query/optimizer/node.cpp1
-rw-r--r--src/mongo/db/query/optimizer/rewrites/path.cpp3
-rw-r--r--src/mongo/db/query/optimizer/utils/path_utils.cpp324
-rw-r--r--src/mongo/db/query/optimizer/utils/path_utils.h174
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.cpp304
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.h124
16 files changed, 514 insertions, 434 deletions
diff --git a/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp b/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp
index be4820d64a2..49dbb571d3c 100644
--- a/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp
+++ b/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp
@@ -37,7 +37,7 @@
#include "mongo/db/pipeline/accumulator.h"
#include "mongo/db/pipeline/accumulator_multi.h"
#include "mongo/db/pipeline/expression_walker.h"
-#include "mongo/db/query/optimizer/utils/utils.h"
+#include "mongo/db/query/optimizer/utils/path_utils.h"
namespace mongo::optimizer {
diff --git a/src/mongo/db/pipeline/abt/canonical_query_translation.cpp b/src/mongo/db/pipeline/abt/canonical_query_translation.cpp
index cd1af40041b..7f0be84bd6d 100644
--- a/src/mongo/db/pipeline/abt/canonical_query_translation.cpp
+++ b/src/mongo/db/pipeline/abt/canonical_query_translation.cpp
@@ -33,6 +33,7 @@
#include "mongo/db/pipeline/abt/collation_translation.h"
#include "mongo/db/pipeline/abt/match_expression_visitor.h"
#include "mongo/db/pipeline/abt/transformer_visitor.h"
+#include "mongo/db/query/optimizer/utils/path_utils.h"
namespace mongo::optimizer {
diff --git a/src/mongo/db/pipeline/abt/document_source_visitor.cpp b/src/mongo/db/pipeline/abt/document_source_visitor.cpp
index d109ee01eb6..2940bc9458b 100644
--- a/src/mongo/db/pipeline/abt/document_source_visitor.cpp
+++ b/src/mongo/db/pipeline/abt/document_source_visitor.cpp
@@ -38,11 +38,9 @@
#include "mongo/db/pipeline/document_source_bucket_auto.h"
#include "mongo/db/pipeline/document_source_coll_stats.h"
#include "mongo/db/pipeline/document_source_current_op.h"
-#include "mongo/db/pipeline/document_source_cursor.h"
#include "mongo/db/pipeline/document_source_exchange.h"
#include "mongo/db/pipeline/document_source_facet.h"
#include "mongo/db/pipeline/document_source_geo_near.h"
-#include "mongo/db/pipeline/document_source_geo_near_cursor.h"
#include "mongo/db/pipeline/document_source_graph_lookup.h"
#include "mongo/db/pipeline/document_source_group.h"
#include "mongo/db/pipeline/document_source_index_stats.h"
@@ -62,7 +60,6 @@
#include "mongo/db/pipeline/document_source_plan_cache_stats.h"
#include "mongo/db/pipeline/document_source_queue.h"
#include "mongo/db/pipeline/document_source_redact.h"
-#include "mongo/db/pipeline/document_source_replace_root.h"
#include "mongo/db/pipeline/document_source_sample.h"
#include "mongo/db/pipeline/document_source_sample_from_random_cursor.h"
#include "mongo/db/pipeline/document_source_sequential_document_cache.h"
@@ -76,7 +73,7 @@
#include "mongo/db/pipeline/visitors/document_source_visitor_registry_mongod.h"
#include "mongo/db/pipeline/visitors/document_source_walker.h"
#include "mongo/db/pipeline/visitors/transformer_interface_walker.h"
-#include "mongo/s/query/document_source_merge_cursors.h"
+#include "mongo/db/query/optimizer/utils/path_utils.h"
#include "mongo/util/string_map.h"
namespace mongo::optimizer {
diff --git a/src/mongo/db/pipeline/abt/field_map_builder.cpp b/src/mongo/db/pipeline/abt/field_map_builder.cpp
index 9fd23d34bba..55085817c92 100644
--- a/src/mongo/db/pipeline/abt/field_map_builder.cpp
+++ b/src/mongo/db/pipeline/abt/field_map_builder.cpp
@@ -28,6 +28,8 @@
*/
#include "mongo/db/pipeline/abt/field_map_builder.h"
+#include "mongo/db/query/optimizer/utils/path_utils.h"
+
namespace mongo::optimizer {
diff --git a/src/mongo/db/pipeline/abt/field_map_builder.h b/src/mongo/db/pipeline/abt/field_map_builder.h
index aee7d53f052..ba21193520e 100644
--- a/src/mongo/db/pipeline/abt/field_map_builder.h
+++ b/src/mongo/db/pipeline/abt/field_map_builder.h
@@ -31,7 +31,6 @@
#include "mongo/db/pipeline/field_path.h"
#include "mongo/db/query/optimizer/node.h"
-#include "mongo/db/query/optimizer/utils/utils.h"
namespace mongo::optimizer {
diff --git a/src/mongo/db/pipeline/abt/match_expression_visitor.cpp b/src/mongo/db/pipeline/abt/match_expression_visitor.cpp
index fcd88189e19..85e92eac62f 100644
--- a/src/mongo/db/pipeline/abt/match_expression_visitor.cpp
+++ b/src/mongo/db/pipeline/abt/match_expression_visitor.cpp
@@ -63,7 +63,7 @@
#include "mongo/db/pipeline/abt/agg_expression_visitor.h"
#include "mongo/db/pipeline/abt/expr_algebrizer_context.h"
#include "mongo/db/pipeline/abt/utils.h"
-#include "mongo/db/query/optimizer/utils/utils.h"
+#include "mongo/db/query/optimizer/utils/path_utils.h"
namespace mongo::optimizer {
diff --git a/src/mongo/db/pipeline/abt/utils.cpp b/src/mongo/db/pipeline/abt/utils.cpp
index 2051ad72c5b..a22424d3580 100644
--- a/src/mongo/db/pipeline/abt/utils.cpp
+++ b/src/mongo/db/pipeline/abt/utils.cpp
@@ -31,7 +31,7 @@
#include "mongo/db/exec/docval_to_sbeval.h"
#include "mongo/db/exec/sbe/values/bson.h"
-#include "mongo/db/query/optimizer/utils/utils.h"
+#include "mongo/db/query/optimizer/utils/path_utils.h"
namespace mongo::optimizer {
diff --git a/src/mongo/db/query/ce/histogram_estimator.cpp b/src/mongo/db/query/ce/histogram_estimator.cpp
index 67ed40f66d9..8f08eb2a065 100644
--- a/src/mongo/db/query/ce/histogram_estimator.cpp
+++ b/src/mongo/db/query/ce/histogram_estimator.cpp
@@ -41,6 +41,7 @@
#include "mongo/db/query/optimizer/utils/abt_hash.h"
#include "mongo/db/query/optimizer/utils/ce_math.h"
#include "mongo/db/query/optimizer/utils/memo_utils.h"
+#include "mongo/db/query/optimizer/utils/path_utils.h"
#include "mongo/logv2/log.h"
diff --git a/src/mongo/db/query/optimizer/SConscript b/src/mongo/db/query/optimizer/SConscript
index a73a71211b5..b7de866cf16 100644
--- a/src/mongo/db/query/optimizer/SConscript
+++ b/src/mongo/db/query/optimizer/SConscript
@@ -30,6 +30,7 @@ env.Library(
"utils/abt_hash.cpp",
"utils/ce_math.cpp",
"utils/interval_utils.cpp",
+ "utils/path_utils.cpp",
"utils/reftracker_utils.cpp",
"utils/utils.cpp",
],
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
index 759f53b693e..01f2bc84c77 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
@@ -31,6 +31,7 @@
#include "mongo/db/query/optimizer/cascades/rewriter_rules.h"
#include "mongo/db/query/optimizer/reference_tracker.h"
+#include "mongo/db/query/optimizer/utils/path_utils.h"
#include "mongo/db/query/optimizer/utils/reftracker_utils.h"
diff --git a/src/mongo/db/query/optimizer/node.cpp b/src/mongo/db/query/optimizer/node.cpp
index 133af61cd2d..09b43c619c8 100644
--- a/src/mongo/db/query/optimizer/node.cpp
+++ b/src/mongo/db/query/optimizer/node.cpp
@@ -31,6 +31,7 @@
#include <stack>
#include "mongo/db/query/optimizer/node.h"
+#include "mongo/db/query/optimizer/utils/path_utils.h"
#include "mongo/db/query/optimizer/utils/utils.h"
namespace mongo::optimizer {
diff --git a/src/mongo/db/query/optimizer/rewrites/path.cpp b/src/mongo/db/query/optimizer/rewrites/path.cpp
index b158fbc9a50..d3183bccd33 100644
--- a/src/mongo/db/query/optimizer/rewrites/path.cpp
+++ b/src/mongo/db/query/optimizer/rewrites/path.cpp
@@ -28,7 +28,8 @@
*/
#include "mongo/db/query/optimizer/rewrites/path.h"
-#include "mongo/db/query/optimizer/utils/utils.h"
+#include "mongo/db/query/optimizer/utils/path_utils.h"
+
namespace mongo::optimizer {
ABT::reference_type PathFusion::follow(ABT::reference_type n) {
diff --git a/src/mongo/db/query/optimizer/utils/path_utils.cpp b/src/mongo/db/query/optimizer/utils/path_utils.cpp
new file mode 100644
index 00000000000..8a97e15fb1c
--- /dev/null
+++ b/src/mongo/db/query/optimizer/utils/path_utils.cpp
@@ -0,0 +1,324 @@
+/**
+ * Copyright (C) 2023-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/optimizer/utils/path_utils.h"
+
+
+namespace mongo::optimizer {
+
+std::vector<ABT::reference_type> collectComposed(const ABT& n) {
+ if (auto comp = n.cast<PathComposeM>(); comp) {
+ auto lhs = collectComposed(comp->getPath1());
+ auto rhs = collectComposed(comp->getPath2());
+ lhs.insert(lhs.end(), rhs.begin(), rhs.end());
+ return lhs;
+ }
+ return {n.ref()};
+}
+
+// Helper function to count the size of a nested conjunction.
+size_t countComposed(const ABT& n) {
+ if (auto comp = n.cast<PathComposeM>()) {
+ return countComposed(comp->getPath1()) + countComposed(comp->getPath2());
+ }
+ return 1;
+}
+
+std::vector<ABT::reference_type> collectComposedBounded(const ABT& n, size_t maxDepth) {
+ if (countComposed(n) > maxDepth) {
+ return {n.ref()};
+ }
+ return collectComposed(n);
+}
+
+static ABT appendFieldPath(const FieldPathType& fieldPath, ABT input) {
+ for (size_t index = fieldPath.size(); index-- > 0;) {
+ input = make<PathGet>(fieldPath.at(index), std::move(input));
+ }
+ return input;
+}
+
+boost::optional<ABT> decomposeToFilterNodes(const ABT& input,
+ const ABT& path,
+ const ABT& pathInput,
+ const size_t minDepth,
+ const size_t maxDepth) {
+ ABT::reference_type subPathRef = path.ref();
+ FieldPathType fieldPath;
+ while (const auto newPath = subPathRef.cast<PathGet>()) {
+ fieldPath.push_back(newPath->name());
+ subPathRef = newPath->getPath().ref();
+ }
+
+ ABT subPath = subPathRef;
+ if (auto composition = collectComposedBounded(subPath, maxDepth);
+ composition.size() >= minDepth) {
+ // Remove the path composition and insert two filter nodes.
+ ABT result = input;
+ for (const auto& element : composition) {
+ result =
+ make<FilterNode>(make<EvalFilter>(appendFieldPath(fieldPath, element), pathInput),
+ std::move(result));
+ }
+ return result;
+ }
+
+ return boost::none;
+}
+
+bool isSimplePath(const ABT& node) {
+ if (auto getPtr = node.cast<PathGet>();
+ getPtr != nullptr && getPtr->getPath().is<PathIdentity>()) {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * Removes Traverse nodes from a single path, using MultikeynessTrie which tells us
+ * which child paths are never applied to an array.
+ */
+class MultikeynessSimplifier {
+public:
+ bool operator()(ABT&, PathIdentity&, const MultikeynessTrie&, bool /*skippedParentTraverse*/) {
+ // No simplifications apply here.
+ return false;
+ }
+
+ bool operator()(ABT& path,
+ PathGet& get,
+ const MultikeynessTrie& trie,
+ bool skippedParentTraverse) {
+ if (auto it = trie.children.find(get.name()); it != trie.children.end()) {
+ return get.getPath().visit(*this, it->second, false /*skippedParentTraverse*/);
+ } else {
+ return false;
+ }
+ }
+
+ bool operator()(ABT& path,
+ PathTraverse& traverse,
+ const MultikeynessTrie& trie,
+ bool skippedParentTraverse) {
+ tassert(6859603,
+ "Unexpected maxDepth for Traverse in MultikeynessSimplifier",
+ traverse.getMaxDepth() == PathTraverse::kSingleLevel);
+
+ if (!trie.isMultiKey) {
+ // This path is never applied to an array: we can remove any number of Traverse nodes,
+ // of any maxDepth.
+ path = std::exchange(traverse.getPath(), make<Blackhole>());
+ // The parent can't have been a Traverse that we skipped, because we would have
+ // removed it, because !trie.isMultiKey.
+ invariant(!skippedParentTraverse);
+ path.visit(*this, trie, false /*skippedParentTraverse*/);
+ return true;
+ } else if (traverse.getMaxDepth() == PathTraverse::kSingleLevel && !skippedParentTraverse) {
+ // This path is possibly multikey, so we can't remove any Traverse nodes.
+ // But each edge in the trie represents a 'Traverse [1] Get [a]', so we can
+ // skip a single Traverse [1] node.
+ return traverse.getPath().visit(*this, trie, true /*skippedParentTraverse*/);
+ } else {
+ // We have no information about multikeyness of the child path.
+ return false;
+ }
+ }
+
+ bool operator()(ABT& path,
+ PathLambda& pathLambda,
+ const MultikeynessTrie& trie,
+ bool skippedParentTraverse) {
+ // Look for PathLambda Lambda [tmp] UnaryOp [Not] EvalFilter <path> Variable [tmp],
+ // and simplify <path>. This works because 'tmp' is the same variable name in both places,
+ // so <path> is applied to the same input as the PathLambda. (And the 'trie' tells us
+ // which parts of that input are not arrays.)
+
+ // In the future we may want to generalize this to skip over other expressions besides Not,
+ // as long as the Lambda and EvalFilter are connected by a variable.
+
+ if (auto* lambda = pathLambda.getLambda().cast<LambdaAbstraction>()) {
+ if (auto* unary = lambda->getBody().cast<UnaryOp>();
+ unary && unary->op() == Operations::Not) {
+ if (auto* evalFilter = unary->getChild().cast<EvalFilter>()) {
+ if (auto* variable = evalFilter->getInput().cast<Variable>();
+ variable && variable->name() == lambda->varName()) {
+ return evalFilter->getPath().visit(
+ *this, trie, false /*skippedParentTraverse*/);
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ bool operator()(ABT& path,
+ PathComposeM& compose,
+ const MultikeynessTrie& trie,
+ bool skippedParentTraverse) {
+ const bool simplified1 = compose.getPath1().visit(*this, trie, skippedParentTraverse);
+ const bool simplified2 = compose.getPath2().visit(*this, trie, skippedParentTraverse);
+ return simplified1 || simplified2;
+ }
+
+ template <typename T, typename... Ts>
+ bool operator()(ABT& n, T& /*node*/, Ts&&...) {
+ // Don't optimize a node we don't recognize.
+ return false;
+
+ // Some other cases to consider:
+ // - Remove PathArr for non-multikey paths.
+ // - Descend into disjunction.
+ // - Descend into PathLambda and simplify expressions, especially Not and EvalFilter.
+ }
+
+ static bool simplify(ABT& path, const MultikeynessTrie& trie) {
+ MultikeynessSimplifier instance;
+ return path.visit(instance, trie, false /*skippedParentTraverse*/);
+ }
+};
+
+bool simplifyTraverseNonArray(ABT& path, const MultikeynessTrie& multikeynessTrie) {
+ return MultikeynessSimplifier::simplify(path, multikeynessTrie);
+}
+
+class IndexPathFusor {
+public:
+ /**
+ * 'n' - The complete index path being compared to, can be modified if needed.
+ * 'node' - Same as 'n' but cast to a specific type by the caller in order to invoke the
+ * correct operator.
+ * 'other' - The query path, of which the index may satisfy a prefix.
+ */
+ IndexFusionResult operator()(const ABT& n, const PathGet& node, const ABT& other) {
+ if (auto otherGet = other.cast<PathGet>();
+ otherGet != nullptr && otherGet->name() == node.name()) {
+ if (auto otherChildTraverse = otherGet->getPath().cast<PathTraverse>();
+ otherChildTraverse != nullptr && !node.getPath().is<PathTraverse>()) {
+ // If a query path has a Traverse, but the index path doesn't, the query can
+ // still be evaluated by this index. Skip the Traverse node, and continue matching.
+ // This works because we know the Traverse will never be applied to an array,
+ // so 'Traverse [anything] p == p'.
+ auto result = node.getPath().visit(*this, otherChildTraverse->getPath());
+ result._numTraversesSkipped++;
+ return result;
+ } else {
+ return node.getPath().visit(*this, otherGet->getPath());
+ }
+ }
+ return {};
+ }
+
+ IndexFusionResult operator()(const ABT& n, const PathTraverse& node, const ABT& other) {
+ if (auto otherTraverse = other.cast<PathTraverse>();
+ otherTraverse != nullptr && otherTraverse->getMaxDepth() == node.getMaxDepth()) {
+ auto result = node.getPath().visit(*this, otherTraverse->getPath());
+ result._numTraversesFused++;
+ return result;
+ }
+ return {};
+ }
+
+ IndexFusionResult operator()(const ABT& n, const PathIdentity& node, const ABT& other) {
+ return {other.ref()};
+ }
+
+ template <typename T, typename... Ts>
+ IndexFusionResult operator()(const ABT& /*n*/, const T& /*node*/, Ts&&...) {
+ uasserted(6624152, "Unexpected node type");
+ }
+};
+
+IndexFusionResult fuseIndexPath(const ABT& node, const ABT& candidatePrefix) {
+ IndexPathFusor instance;
+ return candidatePrefix.visit(instance, node);
+}
+
+/**
+ * Check if an index path contains a Traverse element.
+ */
+class PathTraverseChecker {
+public:
+ PathTraverseChecker() {}
+
+ bool transport(const ABT& /*n*/, const PathTraverse& /*node*/, bool /*childResult*/) {
+ return true;
+ }
+
+ bool transport(const ABT& /*n*/, const PathGet& /*node*/, bool childResult) {
+ return childResult;
+ }
+
+ bool transport(const ABT& /*n*/, const PathIdentity& /*node*/) {
+ return false;
+ }
+
+ template <typename T, typename... Ts>
+ bool transport(const ABT& /*n*/, const T& /*node*/, Ts&&...) {
+ uasserted(6624153, "Index paths only consist of Get, Traverse, and Id nodes.");
+ return false;
+ }
+
+ bool check(const ABT& path) {
+ return algebra::transport<true>(path, *this);
+ }
+};
+
+bool checkPathContainsTraverse(const ABT& path) {
+ return PathTraverseChecker{}.check(path);
+}
+
+/**
+ * Checks if a path ends in a Traverse + PathId.
+ */
+class PathEndsInTraverseId {
+public:
+ bool transport(const optimizer::PathTraverse& node, bool childResult) {
+ return node.getPath().is<PathIdentity>() || childResult;
+ }
+
+ bool transport(const optimizer::PathGet& /*node*/, bool childResult) {
+ return childResult;
+ }
+
+ bool transport(const optimizer::PathIdentity& /*node*/) {
+ return false;
+ }
+
+ template <typename T, typename... Ts>
+ bool transport(const T& node, Ts&&... /* args */) {
+ uasserted(6749500, "Unexpected node in transport to check if path is $elemMatch.");
+ }
+};
+
+bool pathEndsInTraverse(const optimizer::ABT& path) {
+ PathEndsInTraverseId t;
+ return optimizer::algebra::transport<false>(path, t);
+}
+
+} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/utils/path_utils.h b/src/mongo/db/query/optimizer/utils/path_utils.h
new file mode 100644
index 00000000000..c5e33bf6e39
--- /dev/null
+++ b/src/mongo/db/query/optimizer/utils/path_utils.h
@@ -0,0 +1,174 @@
+/**
+ * Copyright (C) 2023-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/optimizer/defs.h"
+#include "mongo/db/query/optimizer/node.h"
+
+
+namespace mongo::optimizer {
+
+/**
+ * Returns a vector all paths nested under conjunctions (PathComposeM) in the given path.
+ * For example, PathComposeM(PathComposeM(Foo, Bar), Baz) returns [Foo, Bar, Baz].
+ * If the given path is not a conjunction, returns a vector with the given path.
+ */
+std::vector<ABT::reference_type> collectComposed(const ABT& n);
+
+/**
+ * Like collectComposed() but bounded by a maximum number of composed paths.
+ * If the given path has more PathComposeM;s than specified by maxDepth, then return a vector
+ * with the given path. Otherwise, returns the result of collectComposed().
+ *
+ * This is useful for preventing the optimizer from unintentionally creating a very deep tree which
+ * causes stack-overflow on a recursive traversal.
+ */
+std::vector<ABT::reference_type> collectComposedBounded(const ABT& n, size_t maxDepth);
+
+/**
+ * De-compose a path and an input to an EvalFilter into sequence of Filter nodes. If we have a path
+ * with a prefix of PathGet's followed by a series of nested PathComposeM, then split into two or
+ * more filter nodes at the composition and retain the prefix for each. The result is a tree of
+ * chained filter nodes. We return an empty result if we have less than "minDepth" sub-tress which
+ * are composed. If "minDepth" = 1, then we are guaranteed to return a result, which will consist of
+ * a single Filter node.
+ *
+ * If the number of compositions exceeds "maxDepth" then we return the a single FilterNode
+ * consisting of an EvalFilter over the original path and input.
+ *
+ * TODO: SERVER-73744. Consolidate usages in a new optimizer phase.
+ */
+constexpr size_t kMaxPathConjunctionDecomposition = 20;
+boost::optional<ABT> decomposeToFilterNodes(const ABT& input,
+ const ABT& path,
+ const ABT& pathInput,
+ size_t minDepth,
+ size_t maxDepth = kMaxPathConjunctionDecomposition);
+
+/**
+ * Returns true if the path represented by 'node' is of the form PathGet "field" PathId
+ */
+bool isSimplePath(const ABT& node);
+
+template <class Element = PathComposeM>
+inline void maybeComposePath(ABT& composition, ABT child) {
+ if (child.is<PathIdentity>()) {
+ return;
+ }
+ if (composition.is<PathIdentity>()) {
+ composition = std::move(child);
+ return;
+ }
+
+ composition = make<Element>(std::move(composition), std::move(child));
+}
+
+/**
+ * Creates a balanced tree of composition elements over the input vector which it modifies in place.
+ * In the end at most one element remains in the vector.
+ */
+template <class Element = PathComposeM>
+inline void maybeComposePaths(ABTVector& paths) {
+ while (paths.size() > 1) {
+ const size_t half = paths.size() / 2;
+ for (size_t i = 0; i < half; i++) {
+ maybeComposePath<Element>(paths.at(i), std::move(paths.back()));
+ paths.pop_back();
+ }
+ }
+}
+
+/**
+ * Appends a path to another path. Performs the append at PathIdentity elements.
+ */
+class PathAppender {
+public:
+ PathAppender(ABT suffix) : _suffix(std::move(suffix)) {}
+
+ void transport(ABT& n, const PathIdentity& node) {
+ n = _suffix;
+ }
+
+ template <typename T, typename... Ts>
+ void transport(ABT& /*n*/, const T& /*node*/, Ts&&...) {
+ // noop
+ }
+
+ /**
+ * Concatenate 'prefix' and 'suffix' by modifying 'prefix' in place.
+ */
+ static void appendInPlace(ABT& prefix, ABT suffix) {
+ PathAppender instance{std::move(suffix)};
+ algebra::transport<true>(prefix, instance);
+ }
+
+ /**
+ * Return the concatenation of 'prefix' and 'suffix'.
+ */
+ [[nodiscard]] static ABT append(ABT prefix, ABT suffix) {
+ appendInPlace(prefix, std::move(suffix));
+ return prefix;
+ }
+
+private:
+ ABT _suffix;
+};
+
+/**
+ * Given a path and a MultikeynessTrie describing the path's input,
+ * removes any Traverse nodes that we know will never encounter an array.
+ *
+ * Returns true if any changes were made to the ABT.
+ */
+bool simplifyTraverseNonArray(ABT& path, const MultikeynessTrie& multikeynessTrie);
+
+/**
+ * Fuses an index path and a query path to determine a residual path to apply over the index
+ * results. Checks if one index path is a prefix of another. Considers only Get, Traverse, and Id.
+ * Return the suffix that doesn't match.
+ */
+struct IndexFusionResult {
+ boost::optional<ABT::reference_type> _suffix;
+ size_t _numTraversesSkipped = 0;
+ size_t _numTraversesFused = 0;
+};
+IndexFusionResult fuseIndexPath(const ABT& node, const ABT& candidatePrefix);
+
+/**
+ * Check if a path contains a Traverse element.
+ */
+bool checkPathContainsTraverse(const ABT& path);
+
+/**
+ * This helper checks to see if we have a PathTraverse + PathId at the end of the path.
+ */
+bool pathEndsInTraverse(const optimizer::ABT& path);
+
+} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp
index f6de52ecc7c..3c12f562616 100644
--- a/src/mongo/db/query/optimizer/utils/utils.cpp
+++ b/src/mongo/db/query/optimizer/utils/utils.cpp
@@ -36,77 +36,11 @@
#include "mongo/db/query/optimizer/syntax/syntax.h"
#include "mongo/db/query/optimizer/utils/ce_math.h"
#include "mongo/db/query/optimizer/utils/interval_utils.h"
+#include "mongo/db/query/optimizer/utils/path_utils.h"
#include "mongo/db/storage/storage_parameters_gen.h"
-namespace mongo::optimizer {
-
-std::vector<ABT::reference_type> collectComposed(const ABT& n) {
- if (auto comp = n.cast<PathComposeM>(); comp) {
- auto lhs = collectComposed(comp->getPath1());
- auto rhs = collectComposed(comp->getPath2());
- lhs.insert(lhs.end(), rhs.begin(), rhs.end());
- return lhs;
- }
- return {n.ref()};
-}
-
-// Helper function to count the size of a nested conjunction.
-size_t countComposed(const ABT& n) {
- if (auto comp = n.cast<PathComposeM>()) {
- return countComposed(comp->getPath1()) + countComposed(comp->getPath2());
- }
- return 1;
-}
-
-std::vector<ABT::reference_type> collectComposedBounded(const ABT& n, size_t maxDepth) {
- if (countComposed(n) > maxDepth) {
- return {n.ref()};
- }
- return collectComposed(n);
-}
-
-static ABT appendFieldPath(const FieldPathType& fieldPath, ABT input) {
- for (size_t index = fieldPath.size(); index-- > 0;) {
- input = make<PathGet>(fieldPath.at(index), std::move(input));
- }
- return input;
-}
-
-boost::optional<ABT> decomposeToFilterNodes(const ABT& input,
- const ABT& path,
- const ABT& pathInput,
- const size_t minDepth,
- const size_t maxDepth) {
- ABT::reference_type subPathRef = path.ref();
- FieldPathType fieldPath;
- while (const auto newPath = subPathRef.cast<PathGet>()) {
- fieldPath.push_back(newPath->name());
- subPathRef = newPath->getPath().ref();
- }
-
- ABT subPath = subPathRef;
- if (auto composition = collectComposedBounded(subPath, maxDepth);
- composition.size() >= minDepth) {
- // Remove the path composition and insert two filter nodes.
- ABT result = input;
- for (const auto& element : composition) {
- result =
- make<FilterNode>(make<EvalFilter>(appendFieldPath(fieldPath, element), pathInput),
- std::move(result));
- }
- return result;
- }
-
- return boost::none;
-}
-bool isSimplePath(const ABT& node) {
- if (auto getPtr = node.cast<PathGet>();
- getPtr != nullptr && getPtr->getPath().is<PathIdentity>()) {
- return true;
- }
- return false;
-}
+namespace mongo::optimizer {
ProjectionNameOrderedSet convertToOrderedSet(ProjectionNameSet unordered) {
ProjectionNameOrderedSet ordered;
@@ -751,147 +685,6 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq(
return result;
}
-/**
- * Check if an index path contains a Traverse element.
- */
-class PathTraverseChecker {
-public:
- PathTraverseChecker() {}
-
- bool transport(const ABT& /*n*/, const PathTraverse& /*node*/, bool /*childResult*/) {
- return true;
- }
-
- bool transport(const ABT& /*n*/, const PathGet& /*node*/, bool childResult) {
- return childResult;
- }
-
- bool transport(const ABT& /*n*/, const PathIdentity& /*node*/) {
- return false;
- }
-
- template <typename T, typename... Ts>
- bool transport(const ABT& /*n*/, const T& /*node*/, Ts&&...) {
- uasserted(6624153, "Index paths only consist of Get, Traverse, and Id nodes.");
- return false;
- }
-
- bool check(const ABT& path) {
- return algebra::transport<true>(path, *this);
- }
-};
-
-bool checkPathContainsTraverse(const ABT& path) {
- return PathTraverseChecker{}.check(path);
-}
-
-/**
- * Removes Traverse nodes from a single path, using MultikeynessTrie which tells us
- * which child paths are never applied to an array.
- */
-class MultikeynessSimplifier {
-public:
- bool operator()(ABT&, PathIdentity&, const MultikeynessTrie&, bool /*skippedParentTraverse*/) {
- // No simplifications apply here.
- return false;
- }
-
- bool operator()(ABT& path,
- PathGet& get,
- const MultikeynessTrie& trie,
- bool skippedParentTraverse) {
- if (auto it = trie.children.find(get.name()); it != trie.children.end()) {
- return get.getPath().visit(*this, it->second, false /*skippedParentTraverse*/);
- } else {
- return false;
- }
- }
-
- bool operator()(ABT& path,
- PathTraverse& traverse,
- const MultikeynessTrie& trie,
- bool skippedParentTraverse) {
- tassert(6859603,
- "Unexpected maxDepth for Traverse in MultikeynessSimplifier",
- traverse.getMaxDepth() == PathTraverse::kSingleLevel);
-
- if (!trie.isMultiKey) {
- // This path is never applied to an array: we can remove any number of Traverse nodes,
- // of any maxDepth.
- path = std::exchange(traverse.getPath(), make<Blackhole>());
- // The parent can't have been a Traverse that we skipped, because we would have
- // removed it, because !trie.isMultiKey.
- invariant(!skippedParentTraverse);
- path.visit(*this, trie, false /*skippedParentTraverse*/);
- return true;
- } else if (traverse.getMaxDepth() == PathTraverse::kSingleLevel && !skippedParentTraverse) {
- // This path is possibly multikey, so we can't remove any Traverse nodes.
- // But each edge in the trie represents a 'Traverse [1] Get [a]', so we can
- // skip a single Traverse [1] node.
- return traverse.getPath().visit(*this, trie, true /*skippedParentTraverse*/);
- } else {
- // We have no information about multikeyness of the child path.
- return false;
- }
- }
-
- bool operator()(ABT& path,
- PathLambda& pathLambda,
- const MultikeynessTrie& trie,
- bool skippedParentTraverse) {
- // Look for PathLambda Lambda [tmp] UnaryOp [Not] EvalFilter <path> Variable [tmp],
- // and simplify <path>. This works because 'tmp' is the same variable name in both places,
- // so <path> is applied to the same input as the PathLambda. (And the 'trie' tells us
- // which parts of that input are not arrays.)
-
- // In the future we may want to generalize this to skip over other expressions besides Not,
- // as long as the Lambda and EvalFilter are connected by a variable.
-
- if (auto* lambda = pathLambda.getLambda().cast<LambdaAbstraction>()) {
- if (auto* unary = lambda->getBody().cast<UnaryOp>();
- unary && unary->op() == Operations::Not) {
- if (auto* evalFilter = unary->getChild().cast<EvalFilter>()) {
- if (auto* variable = evalFilter->getInput().cast<Variable>();
- variable && variable->name() == lambda->varName()) {
- return evalFilter->getPath().visit(
- *this, trie, false /*skippedParentTraverse*/);
- }
- }
- }
- }
- return false;
- }
-
- bool operator()(ABT& path,
- PathComposeM& compose,
- const MultikeynessTrie& trie,
- bool skippedParentTraverse) {
- const bool simplified1 = compose.getPath1().visit(*this, trie, skippedParentTraverse);
- const bool simplified2 = compose.getPath2().visit(*this, trie, skippedParentTraverse);
- return simplified1 || simplified2;
- }
-
- template <typename T, typename... Ts>
- bool operator()(ABT& n, T& /*node*/, Ts&&...) {
- // Don't optimize a node we don't recognize.
- return false;
-
- // Some other cases to consider:
- // - Remove PathArr for non-multikey paths.
- // - Descend into disjunction.
- // - Descend into PathLambda and simplify expressions, especially Not and EvalFilter.
- }
-
- static bool simplify(ABT& path, const MultikeynessTrie& trie) {
- MultikeynessSimplifier instance;
- return path.visit(instance, trie, false /*skippedParentTraverse*/);
- }
-};
-
-bool simplifyTraverseNonArray(ABT& path, const MultikeynessTrie& multikeynessTrie) {
- return MultikeynessSimplifier::simplify(path, multikeynessTrie);
-}
-
bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName,
const MultikeynessTrie& multikeynessTrie,
PartialSchemaRequirements& reqMap,
@@ -1162,69 +955,6 @@ const ProjectionName& getExistingOrTempProjForFieldName(PrefixId& prefixId,
}
/**
- * Fuses an index path and a query path to determine a residual path to apply over the index
- * results. Checks if one index path is a prefix of another. Considers only Get, Traverse, and Id.
- * Return the suffix that doesn't match.
- */
-class IndexPathFusor {
-public:
- struct ResultType {
- boost::optional<ABT::reference_type> _suffix;
- size_t _numTraversesSkipped = 0;
- size_t _numTraversesFused = 0;
- };
-
- /**
- * 'n' - The complete index path being compared to, can be modified if needed.
- * 'node' - Same as 'n' but cast to a specific type by the caller in order to invoke the
- * correct operator.
- * 'other' - The query path, of which the index may satisfy a prefix.
- */
- ResultType operator()(const ABT& n, const PathGet& node, const ABT& other) {
- if (auto otherGet = other.cast<PathGet>();
- otherGet != nullptr && otherGet->name() == node.name()) {
- if (auto otherChildTraverse = otherGet->getPath().cast<PathTraverse>();
- otherChildTraverse != nullptr && !node.getPath().is<PathTraverse>()) {
- // If a query path has a Traverse, but the index path doesn't, the query can
- // still be evaluated by this index. Skip the Traverse node, and continue matching.
- // This works because we know the Traverse will never be applied to an array,
- // so 'Traverse [anything] p == p'.
- auto result = node.getPath().visit(*this, otherChildTraverse->getPath());
- result._numTraversesSkipped++;
- return result;
- } else {
- return node.getPath().visit(*this, otherGet->getPath());
- }
- }
- return {};
- }
-
- ResultType operator()(const ABT& n, const PathTraverse& node, const ABT& other) {
- if (auto otherTraverse = other.cast<PathTraverse>();
- otherTraverse != nullptr && otherTraverse->getMaxDepth() == node.getMaxDepth()) {
- auto result = node.getPath().visit(*this, otherTraverse->getPath());
- result._numTraversesFused++;
- return result;
- }
- return {};
- }
-
- ResultType operator()(const ABT& n, const PathIdentity& node, const ABT& other) {
- return {other.ref()};
- }
-
- template <typename T, typename... Ts>
- ResultType operator()(const ABT& /*n*/, const T& /*node*/, Ts&&...) {
- uasserted(6624152, "Unexpected node type");
- }
-
- static ResultType fuse(const ABT& node, const ABT& candidatePrefix) {
- IndexPathFusor instance;
- return candidatePrefix.visit(instance, node);
- }
-};
-
-/**
* Pad compound interval with supplied simple interval.
*/
static void extendCompoundInterval(const IndexCollationSpec& indexCollationSpec,
@@ -1394,7 +1124,7 @@ static bool computeCandidateIndexEntry(PrefixId& prefixId,
for (size_t indexField = 0; indexField < indexCollationSpec.size(); indexField++) {
if (const auto fusedPath =
- IndexPathFusor::fuse(queryKey._path, indexCollationSpec.at(indexField)._path);
+ fuseIndexPath(queryKey._path, indexCollationSpec.at(indexField)._path);
fusedPath._suffix &&
(allowFuseTraverse[indexField] || fusedPath._numTraversesFused == 0)) {
if (fusedPath._numTraversesFused > 0) {
@@ -2612,34 +2342,6 @@ PhysPlanBuilder lowerEqPrefixes(PrefixId& prefixId,
return lowerTransport.lower(eqPrefixes.at(eqPrefixIndex)._interval);
}
-/**
- * Checks if a path ends in a Traverse + PathId.
- */
-class PathEndsInTraverseId {
-public:
- bool transport(const optimizer::PathTraverse& node, bool childResult) {
- return node.getPath().is<PathIdentity>() || childResult;
- }
-
- bool transport(const optimizer::PathGet& /*node*/, bool childResult) {
- return childResult;
- }
-
- bool transport(const optimizer::PathIdentity& /*node*/) {
- return false;
- }
-
- template <typename T, typename... Ts>
- bool transport(const T& node, Ts&&... /* args */) {
- uasserted(6749500, "Unexpected node in transport to check if path is $elemMatch.");
- }
-};
-
-bool pathEndsInTraverse(const optimizer::ABT& path) {
- PathEndsInTraverseId t;
- return optimizer::algebra::transport<false>(path, t);
-}
-
bool hasProperIntervals(const PartialSchemaRequirements& reqMap) {
// Compute if this node has any proper (not fully open) intervals.
for (const auto& [key, req] : reqMap.conjuncts()) {
diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h
index 59ba2f30542..3d6e167580a 100644
--- a/src/mongo/db/query/optimizer/utils/utils.h
+++ b/src/mongo/db/query/optimizer/utils/utils.h
@@ -66,76 +66,6 @@ inline size_t computeHashSeq(const Args&... seq) {
}
/**
- * Returns a vector all paths nested under conjunctions (PathComposeM) in the given path.
- * For example, PathComposeM(PathComposeM(Foo, Bar), Baz) returns [Foo, Bar, Baz].
- * If the given path is not a conjunction, returns a vector with the given path.
- */
-std::vector<ABT::reference_type> collectComposed(const ABT& n);
-
-/**
- * Like collectComposed() but bounded by a maximum number of composed paths.
- * If the given path has more PathComposeM;s than specified by maxDepth, then return a vector
- * with the given path. Otherwise, returns the result of collectComposed().
- *
- * This is useful for preventing the optimizer from unintentionally creating a very deep tree which
- * causes stack-overflow on a recursive traversal.
- */
-std::vector<ABT::reference_type> collectComposedBounded(const ABT& n, size_t maxDepth);
-
-/**
- * De-compose a path and an input to an EvalFilter into sequence of Filter nodes. If we have a path
- * with a prefix of PathGet's followed by a series of nested PathComposeM, then split into two or
- * more filter nodes at the composition and retain the prefix for each. The result is a tree of
- * chained filter nodes. We return an empty result if we have less than "minDepth" sub-tress which
- * are composed. If "minDepth" = 1, then we are guaranteed to return a result, which will consist of
- * a single Filter node.
- *
- * If the number of compositions exceeds "maxDepth" then we return the a single FilterNode
- * consisting of an EvalFilter over the original path and input.
- *
- * TODO: SERVER-73744. Consolidate usages in a new optimizer phase.
- */
-constexpr size_t kMaxPathConjunctionDecomposition = 20;
-boost::optional<ABT> decomposeToFilterNodes(const ABT& input,
- const ABT& path,
- const ABT& pathInput,
- size_t minDepth,
- size_t maxDepth = kMaxPathConjunctionDecomposition);
-
-/**
- * Returns true if the path represented by 'node' is of the form PathGet "field" PathId
- */
-bool isSimplePath(const ABT& node);
-
-template <class Element = PathComposeM>
-inline void maybeComposePath(ABT& composition, ABT child) {
- if (child.is<PathIdentity>()) {
- return;
- }
- if (composition.is<PathIdentity>()) {
- composition = std::move(child);
- return;
- }
-
- composition = make<Element>(std::move(composition), std::move(child));
-}
-
-/**
- * Creates a balanced tree of composition elements over the input vector which it modifies in place.
- * In the end at most one element remains in the vector.
- */
-template <class Element = PathComposeM>
-inline void maybeComposePaths(ABTVector& paths) {
- while (paths.size() > 1) {
- const size_t half = paths.size() / 2;
- for (size_t i = 0; i < half; i++) {
- maybeComposePath<Element>(paths.at(i), std::move(paths.back()));
- paths.pop_back();
- }
- }
-}
-
-/**
* Used to access and manipulate the child of a unary node.
*/
template <class NodeType>
@@ -273,42 +203,6 @@ CollationSplitResult splitCollationSpec(const boost::optional<ProjectionName>& r
const ProjectionNameSet& leftProjections,
const ProjectionNameSet& rightProjections);
-/**
- * Appends a path to another path. Performs the append at PathIdentity elements.
- */
-class PathAppender {
-public:
- PathAppender(ABT suffix) : _suffix(std::move(suffix)) {}
-
- void transport(ABT& n, const PathIdentity& node) {
- n = _suffix;
- }
-
- template <typename T, typename... Ts>
- void transport(ABT& /*n*/, const T& /*node*/, Ts&&...) {
- // noop
- }
-
- /**
- * Concatenate 'prefix' and 'suffix' by modifying 'prefix' in place.
- */
- static void appendInPlace(ABT& prefix, ABT suffix) {
- PathAppender instance{std::move(suffix)};
- algebra::transport<true>(prefix, instance);
- }
-
- /**
- * Return the concatenation of 'prefix' and 'suffix'.
- */
- [[nodiscard]] static ABT append(ABT prefix, ABT suffix) {
- appendInPlace(prefix, std::move(suffix));
- return prefix;
- }
-
-private:
- ABT _suffix;
-};
-
struct PartialSchemaReqConversion {
PartialSchemaReqConversion(PartialSchemaRequirements reqMap);
PartialSchemaReqConversion(ABT bound);
@@ -346,14 +240,6 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq(
const ABT& expr, bool isFilterContext, const PathToIntervalFn& pathToInterval);
/**
- * Given a path and a MultikeynessTrie describing the path's input,
- * removes any Traverse nodes that we know will never encounter an array.
- *
- * Returns true if any changes were made to the ABT.
- */
-bool simplifyTraverseNonArray(ABT& path, const MultikeynessTrie& multikeynessTrie);
-
-/**
* Given a set of non-multikey paths, remove redundant Traverse elements from paths in a Partial
* Schema Requirement structure. Returns true if we have an empty result after simplification.
*/
@@ -363,11 +249,6 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName,
const ConstFoldFn& constFold);
/**
- * Check if a path contains a Traverse element.
- */
-bool checkPathContainsTraverse(const ABT& path);
-
-/**
* Try to check whether the predicate 'lhs' is a subset of 'rhs'.
*
* True means 'lhs' is contained in 'rhs': every document that matches
@@ -532,10 +413,5 @@ PhysPlanBuilder lowerEqPrefixes(PrefixId& prefixId,
CEType indexCE,
CEType scanGroupCE);
-/**
- * This helper checks to see if we have a PathTraverse + PathId at the end of the path.
- */
-bool pathEndsInTraverse(const optimizer::ABT& path);
-
bool hasProperIntervals(const PartialSchemaRequirements& reqMap);
} // namespace mongo::optimizer