diff options
author | Svilen Mihaylov <svilen.mihaylov@mongodb.com> | 2023-02-09 19:00:36 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-02-09 22:35:56 +0000 |
commit | 387bd9df945ade25b5955223f04db1917782d728 (patch) | |
tree | 8fb73c9a68e4be1155ff360c1a53e55e2d8dd6d0 | |
parent | 9866a24068c721f4ef99bd66bb659053cc6738ed (diff) | |
download | mongo-387bd9df945ade25b5955223f04db1917782d728.tar.gz |
SERVER-73821 [CQF] Split path utilities from utils.cpp
-rw-r--r-- | src/mongo/db/pipeline/abt/agg_expression_visitor.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/abt/canonical_query_translation.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/pipeline/abt/document_source_visitor.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/pipeline/abt/field_map_builder.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/abt/field_map_builder.h | 1 | ||||
-rw-r--r-- | src/mongo/db/pipeline/abt/match_expression_visitor.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/abt/utils.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/ce/histogram_estimator.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/node.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/rewrites/path.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/utils/path_utils.cpp | 324 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/utils/path_utils.h | 174 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/utils/utils.cpp | 304 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/utils/utils.h | 124 |
16 files changed, 514 insertions, 434 deletions
diff --git a/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp b/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp index be4820d64a2..49dbb571d3c 100644 --- a/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp +++ b/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp @@ -37,7 +37,7 @@ #include "mongo/db/pipeline/accumulator.h" #include "mongo/db/pipeline/accumulator_multi.h" #include "mongo/db/pipeline/expression_walker.h" -#include "mongo/db/query/optimizer/utils/utils.h" +#include "mongo/db/query/optimizer/utils/path_utils.h" namespace mongo::optimizer { diff --git a/src/mongo/db/pipeline/abt/canonical_query_translation.cpp b/src/mongo/db/pipeline/abt/canonical_query_translation.cpp index cd1af40041b..7f0be84bd6d 100644 --- a/src/mongo/db/pipeline/abt/canonical_query_translation.cpp +++ b/src/mongo/db/pipeline/abt/canonical_query_translation.cpp @@ -33,6 +33,7 @@ #include "mongo/db/pipeline/abt/collation_translation.h" #include "mongo/db/pipeline/abt/match_expression_visitor.h" #include "mongo/db/pipeline/abt/transformer_visitor.h" +#include "mongo/db/query/optimizer/utils/path_utils.h" namespace mongo::optimizer { diff --git a/src/mongo/db/pipeline/abt/document_source_visitor.cpp b/src/mongo/db/pipeline/abt/document_source_visitor.cpp index d109ee01eb6..2940bc9458b 100644 --- a/src/mongo/db/pipeline/abt/document_source_visitor.cpp +++ b/src/mongo/db/pipeline/abt/document_source_visitor.cpp @@ -38,11 +38,9 @@ #include "mongo/db/pipeline/document_source_bucket_auto.h" #include "mongo/db/pipeline/document_source_coll_stats.h" #include "mongo/db/pipeline/document_source_current_op.h" -#include "mongo/db/pipeline/document_source_cursor.h" #include "mongo/db/pipeline/document_source_exchange.h" #include "mongo/db/pipeline/document_source_facet.h" #include "mongo/db/pipeline/document_source_geo_near.h" -#include "mongo/db/pipeline/document_source_geo_near_cursor.h" #include "mongo/db/pipeline/document_source_graph_lookup.h" #include "mongo/db/pipeline/document_source_group.h" #include "mongo/db/pipeline/document_source_index_stats.h" @@ -62,7 +60,6 @@ #include "mongo/db/pipeline/document_source_plan_cache_stats.h" #include "mongo/db/pipeline/document_source_queue.h" #include "mongo/db/pipeline/document_source_redact.h" -#include "mongo/db/pipeline/document_source_replace_root.h" #include "mongo/db/pipeline/document_source_sample.h" #include "mongo/db/pipeline/document_source_sample_from_random_cursor.h" #include "mongo/db/pipeline/document_source_sequential_document_cache.h" @@ -76,7 +73,7 @@ #include "mongo/db/pipeline/visitors/document_source_visitor_registry_mongod.h" #include "mongo/db/pipeline/visitors/document_source_walker.h" #include "mongo/db/pipeline/visitors/transformer_interface_walker.h" -#include "mongo/s/query/document_source_merge_cursors.h" +#include "mongo/db/query/optimizer/utils/path_utils.h" #include "mongo/util/string_map.h" namespace mongo::optimizer { diff --git a/src/mongo/db/pipeline/abt/field_map_builder.cpp b/src/mongo/db/pipeline/abt/field_map_builder.cpp index 9fd23d34bba..55085817c92 100644 --- a/src/mongo/db/pipeline/abt/field_map_builder.cpp +++ b/src/mongo/db/pipeline/abt/field_map_builder.cpp @@ -28,6 +28,8 @@ */ #include "mongo/db/pipeline/abt/field_map_builder.h" +#include "mongo/db/query/optimizer/utils/path_utils.h" + namespace mongo::optimizer { diff --git a/src/mongo/db/pipeline/abt/field_map_builder.h b/src/mongo/db/pipeline/abt/field_map_builder.h index aee7d53f052..ba21193520e 100644 --- a/src/mongo/db/pipeline/abt/field_map_builder.h +++ b/src/mongo/db/pipeline/abt/field_map_builder.h @@ -31,7 +31,6 @@ #include "mongo/db/pipeline/field_path.h" #include "mongo/db/query/optimizer/node.h" -#include "mongo/db/query/optimizer/utils/utils.h" namespace mongo::optimizer { diff --git a/src/mongo/db/pipeline/abt/match_expression_visitor.cpp b/src/mongo/db/pipeline/abt/match_expression_visitor.cpp index fcd88189e19..85e92eac62f 100644 --- a/src/mongo/db/pipeline/abt/match_expression_visitor.cpp +++ b/src/mongo/db/pipeline/abt/match_expression_visitor.cpp @@ -63,7 +63,7 @@ #include "mongo/db/pipeline/abt/agg_expression_visitor.h" #include "mongo/db/pipeline/abt/expr_algebrizer_context.h" #include "mongo/db/pipeline/abt/utils.h" -#include "mongo/db/query/optimizer/utils/utils.h" +#include "mongo/db/query/optimizer/utils/path_utils.h" namespace mongo::optimizer { diff --git a/src/mongo/db/pipeline/abt/utils.cpp b/src/mongo/db/pipeline/abt/utils.cpp index 2051ad72c5b..a22424d3580 100644 --- a/src/mongo/db/pipeline/abt/utils.cpp +++ b/src/mongo/db/pipeline/abt/utils.cpp @@ -31,7 +31,7 @@ #include "mongo/db/exec/docval_to_sbeval.h" #include "mongo/db/exec/sbe/values/bson.h" -#include "mongo/db/query/optimizer/utils/utils.h" +#include "mongo/db/query/optimizer/utils/path_utils.h" namespace mongo::optimizer { diff --git a/src/mongo/db/query/ce/histogram_estimator.cpp b/src/mongo/db/query/ce/histogram_estimator.cpp index 67ed40f66d9..8f08eb2a065 100644 --- a/src/mongo/db/query/ce/histogram_estimator.cpp +++ b/src/mongo/db/query/ce/histogram_estimator.cpp @@ -41,6 +41,7 @@ #include "mongo/db/query/optimizer/utils/abt_hash.h" #include "mongo/db/query/optimizer/utils/ce_math.h" #include "mongo/db/query/optimizer/utils/memo_utils.h" +#include "mongo/db/query/optimizer/utils/path_utils.h" #include "mongo/logv2/log.h" diff --git a/src/mongo/db/query/optimizer/SConscript b/src/mongo/db/query/optimizer/SConscript index a73a71211b5..b7de866cf16 100644 --- a/src/mongo/db/query/optimizer/SConscript +++ b/src/mongo/db/query/optimizer/SConscript @@ -30,6 +30,7 @@ env.Library( "utils/abt_hash.cpp", "utils/ce_math.cpp", "utils/interval_utils.cpp", + "utils/path_utils.cpp", "utils/reftracker_utils.cpp", "utils/utils.cpp", ], diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp index 759f53b693e..01f2bc84c77 100644 --- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp +++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp @@ -31,6 +31,7 @@ #include "mongo/db/query/optimizer/cascades/rewriter_rules.h" #include "mongo/db/query/optimizer/reference_tracker.h" +#include "mongo/db/query/optimizer/utils/path_utils.h" #include "mongo/db/query/optimizer/utils/reftracker_utils.h" diff --git a/src/mongo/db/query/optimizer/node.cpp b/src/mongo/db/query/optimizer/node.cpp index 133af61cd2d..09b43c619c8 100644 --- a/src/mongo/db/query/optimizer/node.cpp +++ b/src/mongo/db/query/optimizer/node.cpp @@ -31,6 +31,7 @@ #include <stack> #include "mongo/db/query/optimizer/node.h" +#include "mongo/db/query/optimizer/utils/path_utils.h" #include "mongo/db/query/optimizer/utils/utils.h" namespace mongo::optimizer { diff --git a/src/mongo/db/query/optimizer/rewrites/path.cpp b/src/mongo/db/query/optimizer/rewrites/path.cpp index b158fbc9a50..d3183bccd33 100644 --- a/src/mongo/db/query/optimizer/rewrites/path.cpp +++ b/src/mongo/db/query/optimizer/rewrites/path.cpp @@ -28,7 +28,8 @@ */ #include "mongo/db/query/optimizer/rewrites/path.h" -#include "mongo/db/query/optimizer/utils/utils.h" +#include "mongo/db/query/optimizer/utils/path_utils.h" + namespace mongo::optimizer { ABT::reference_type PathFusion::follow(ABT::reference_type n) { diff --git a/src/mongo/db/query/optimizer/utils/path_utils.cpp b/src/mongo/db/query/optimizer/utils/path_utils.cpp new file mode 100644 index 00000000000..8a97e15fb1c --- /dev/null +++ b/src/mongo/db/query/optimizer/utils/path_utils.cpp @@ -0,0 +1,324 @@ +/** + * Copyright (C) 2023-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/query/optimizer/utils/path_utils.h" + + +namespace mongo::optimizer { + +std::vector<ABT::reference_type> collectComposed(const ABT& n) { + if (auto comp = n.cast<PathComposeM>(); comp) { + auto lhs = collectComposed(comp->getPath1()); + auto rhs = collectComposed(comp->getPath2()); + lhs.insert(lhs.end(), rhs.begin(), rhs.end()); + return lhs; + } + return {n.ref()}; +} + +// Helper function to count the size of a nested conjunction. +size_t countComposed(const ABT& n) { + if (auto comp = n.cast<PathComposeM>()) { + return countComposed(comp->getPath1()) + countComposed(comp->getPath2()); + } + return 1; +} + +std::vector<ABT::reference_type> collectComposedBounded(const ABT& n, size_t maxDepth) { + if (countComposed(n) > maxDepth) { + return {n.ref()}; + } + return collectComposed(n); +} + +static ABT appendFieldPath(const FieldPathType& fieldPath, ABT input) { + for (size_t index = fieldPath.size(); index-- > 0;) { + input = make<PathGet>(fieldPath.at(index), std::move(input)); + } + return input; +} + +boost::optional<ABT> decomposeToFilterNodes(const ABT& input, + const ABT& path, + const ABT& pathInput, + const size_t minDepth, + const size_t maxDepth) { + ABT::reference_type subPathRef = path.ref(); + FieldPathType fieldPath; + while (const auto newPath = subPathRef.cast<PathGet>()) { + fieldPath.push_back(newPath->name()); + subPathRef = newPath->getPath().ref(); + } + + ABT subPath = subPathRef; + if (auto composition = collectComposedBounded(subPath, maxDepth); + composition.size() >= minDepth) { + // Remove the path composition and insert two filter nodes. + ABT result = input; + for (const auto& element : composition) { + result = + make<FilterNode>(make<EvalFilter>(appendFieldPath(fieldPath, element), pathInput), + std::move(result)); + } + return result; + } + + return boost::none; +} + +bool isSimplePath(const ABT& node) { + if (auto getPtr = node.cast<PathGet>(); + getPtr != nullptr && getPtr->getPath().is<PathIdentity>()) { + return true; + } + return false; +} + +/** + * Removes Traverse nodes from a single path, using MultikeynessTrie which tells us + * which child paths are never applied to an array. + */ +class MultikeynessSimplifier { +public: + bool operator()(ABT&, PathIdentity&, const MultikeynessTrie&, bool /*skippedParentTraverse*/) { + // No simplifications apply here. + return false; + } + + bool operator()(ABT& path, + PathGet& get, + const MultikeynessTrie& trie, + bool skippedParentTraverse) { + if (auto it = trie.children.find(get.name()); it != trie.children.end()) { + return get.getPath().visit(*this, it->second, false /*skippedParentTraverse*/); + } else { + return false; + } + } + + bool operator()(ABT& path, + PathTraverse& traverse, + const MultikeynessTrie& trie, + bool skippedParentTraverse) { + tassert(6859603, + "Unexpected maxDepth for Traverse in MultikeynessSimplifier", + traverse.getMaxDepth() == PathTraverse::kSingleLevel); + + if (!trie.isMultiKey) { + // This path is never applied to an array: we can remove any number of Traverse nodes, + // of any maxDepth. + path = std::exchange(traverse.getPath(), make<Blackhole>()); + // The parent can't have been a Traverse that we skipped, because we would have + // removed it, because !trie.isMultiKey. + invariant(!skippedParentTraverse); + path.visit(*this, trie, false /*skippedParentTraverse*/); + return true; + } else if (traverse.getMaxDepth() == PathTraverse::kSingleLevel && !skippedParentTraverse) { + // This path is possibly multikey, so we can't remove any Traverse nodes. + // But each edge in the trie represents a 'Traverse [1] Get [a]', so we can + // skip a single Traverse [1] node. + return traverse.getPath().visit(*this, trie, true /*skippedParentTraverse*/); + } else { + // We have no information about multikeyness of the child path. + return false; + } + } + + bool operator()(ABT& path, + PathLambda& pathLambda, + const MultikeynessTrie& trie, + bool skippedParentTraverse) { + // Look for PathLambda Lambda [tmp] UnaryOp [Not] EvalFilter <path> Variable [tmp], + // and simplify <path>. This works because 'tmp' is the same variable name in both places, + // so <path> is applied to the same input as the PathLambda. (And the 'trie' tells us + // which parts of that input are not arrays.) + + // In the future we may want to generalize this to skip over other expressions besides Not, + // as long as the Lambda and EvalFilter are connected by a variable. + + if (auto* lambda = pathLambda.getLambda().cast<LambdaAbstraction>()) { + if (auto* unary = lambda->getBody().cast<UnaryOp>(); + unary && unary->op() == Operations::Not) { + if (auto* evalFilter = unary->getChild().cast<EvalFilter>()) { + if (auto* variable = evalFilter->getInput().cast<Variable>(); + variable && variable->name() == lambda->varName()) { + return evalFilter->getPath().visit( + *this, trie, false /*skippedParentTraverse*/); + } + } + } + } + return false; + } + + bool operator()(ABT& path, + PathComposeM& compose, + const MultikeynessTrie& trie, + bool skippedParentTraverse) { + const bool simplified1 = compose.getPath1().visit(*this, trie, skippedParentTraverse); + const bool simplified2 = compose.getPath2().visit(*this, trie, skippedParentTraverse); + return simplified1 || simplified2; + } + + template <typename T, typename... Ts> + bool operator()(ABT& n, T& /*node*/, Ts&&...) { + // Don't optimize a node we don't recognize. + return false; + + // Some other cases to consider: + // - Remove PathArr for non-multikey paths. + // - Descend into disjunction. + // - Descend into PathLambda and simplify expressions, especially Not and EvalFilter. + } + + static bool simplify(ABT& path, const MultikeynessTrie& trie) { + MultikeynessSimplifier instance; + return path.visit(instance, trie, false /*skippedParentTraverse*/); + } +}; + +bool simplifyTraverseNonArray(ABT& path, const MultikeynessTrie& multikeynessTrie) { + return MultikeynessSimplifier::simplify(path, multikeynessTrie); +} + +class IndexPathFusor { +public: + /** + * 'n' - The complete index path being compared to, can be modified if needed. + * 'node' - Same as 'n' but cast to a specific type by the caller in order to invoke the + * correct operator. + * 'other' - The query path, of which the index may satisfy a prefix. + */ + IndexFusionResult operator()(const ABT& n, const PathGet& node, const ABT& other) { + if (auto otherGet = other.cast<PathGet>(); + otherGet != nullptr && otherGet->name() == node.name()) { + if (auto otherChildTraverse = otherGet->getPath().cast<PathTraverse>(); + otherChildTraverse != nullptr && !node.getPath().is<PathTraverse>()) { + // If a query path has a Traverse, but the index path doesn't, the query can + // still be evaluated by this index. Skip the Traverse node, and continue matching. + // This works because we know the Traverse will never be applied to an array, + // so 'Traverse [anything] p == p'. + auto result = node.getPath().visit(*this, otherChildTraverse->getPath()); + result._numTraversesSkipped++; + return result; + } else { + return node.getPath().visit(*this, otherGet->getPath()); + } + } + return {}; + } + + IndexFusionResult operator()(const ABT& n, const PathTraverse& node, const ABT& other) { + if (auto otherTraverse = other.cast<PathTraverse>(); + otherTraverse != nullptr && otherTraverse->getMaxDepth() == node.getMaxDepth()) { + auto result = node.getPath().visit(*this, otherTraverse->getPath()); + result._numTraversesFused++; + return result; + } + return {}; + } + + IndexFusionResult operator()(const ABT& n, const PathIdentity& node, const ABT& other) { + return {other.ref()}; + } + + template <typename T, typename... Ts> + IndexFusionResult operator()(const ABT& /*n*/, const T& /*node*/, Ts&&...) { + uasserted(6624152, "Unexpected node type"); + } +}; + +IndexFusionResult fuseIndexPath(const ABT& node, const ABT& candidatePrefix) { + IndexPathFusor instance; + return candidatePrefix.visit(instance, node); +} + +/** + * Check if an index path contains a Traverse element. + */ +class PathTraverseChecker { +public: + PathTraverseChecker() {} + + bool transport(const ABT& /*n*/, const PathTraverse& /*node*/, bool /*childResult*/) { + return true; + } + + bool transport(const ABT& /*n*/, const PathGet& /*node*/, bool childResult) { + return childResult; + } + + bool transport(const ABT& /*n*/, const PathIdentity& /*node*/) { + return false; + } + + template <typename T, typename... Ts> + bool transport(const ABT& /*n*/, const T& /*node*/, Ts&&...) { + uasserted(6624153, "Index paths only consist of Get, Traverse, and Id nodes."); + return false; + } + + bool check(const ABT& path) { + return algebra::transport<true>(path, *this); + } +}; + +bool checkPathContainsTraverse(const ABT& path) { + return PathTraverseChecker{}.check(path); +} + +/** + * Checks if a path ends in a Traverse + PathId. + */ +class PathEndsInTraverseId { +public: + bool transport(const optimizer::PathTraverse& node, bool childResult) { + return node.getPath().is<PathIdentity>() || childResult; + } + + bool transport(const optimizer::PathGet& /*node*/, bool childResult) { + return childResult; + } + + bool transport(const optimizer::PathIdentity& /*node*/) { + return false; + } + + template <typename T, typename... Ts> + bool transport(const T& node, Ts&&... /* args */) { + uasserted(6749500, "Unexpected node in transport to check if path is $elemMatch."); + } +}; + +bool pathEndsInTraverse(const optimizer::ABT& path) { + PathEndsInTraverseId t; + return optimizer::algebra::transport<false>(path, t); +} + +} // namespace mongo::optimizer diff --git a/src/mongo/db/query/optimizer/utils/path_utils.h b/src/mongo/db/query/optimizer/utils/path_utils.h new file mode 100644 index 00000000000..c5e33bf6e39 --- /dev/null +++ b/src/mongo/db/query/optimizer/utils/path_utils.h @@ -0,0 +1,174 @@ +/** + * Copyright (C) 2023-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/query/optimizer/defs.h" +#include "mongo/db/query/optimizer/node.h" + + +namespace mongo::optimizer { + +/** + * Returns a vector all paths nested under conjunctions (PathComposeM) in the given path. + * For example, PathComposeM(PathComposeM(Foo, Bar), Baz) returns [Foo, Bar, Baz]. + * If the given path is not a conjunction, returns a vector with the given path. + */ +std::vector<ABT::reference_type> collectComposed(const ABT& n); + +/** + * Like collectComposed() but bounded by a maximum number of composed paths. + * If the given path has more PathComposeM;s than specified by maxDepth, then return a vector + * with the given path. Otherwise, returns the result of collectComposed(). + * + * This is useful for preventing the optimizer from unintentionally creating a very deep tree which + * causes stack-overflow on a recursive traversal. + */ +std::vector<ABT::reference_type> collectComposedBounded(const ABT& n, size_t maxDepth); + +/** + * De-compose a path and an input to an EvalFilter into sequence of Filter nodes. If we have a path + * with a prefix of PathGet's followed by a series of nested PathComposeM, then split into two or + * more filter nodes at the composition and retain the prefix for each. The result is a tree of + * chained filter nodes. We return an empty result if we have less than "minDepth" sub-tress which + * are composed. If "minDepth" = 1, then we are guaranteed to return a result, which will consist of + * a single Filter node. + * + * If the number of compositions exceeds "maxDepth" then we return the a single FilterNode + * consisting of an EvalFilter over the original path and input. + * + * TODO: SERVER-73744. Consolidate usages in a new optimizer phase. + */ +constexpr size_t kMaxPathConjunctionDecomposition = 20; +boost::optional<ABT> decomposeToFilterNodes(const ABT& input, + const ABT& path, + const ABT& pathInput, + size_t minDepth, + size_t maxDepth = kMaxPathConjunctionDecomposition); + +/** + * Returns true if the path represented by 'node' is of the form PathGet "field" PathId + */ +bool isSimplePath(const ABT& node); + +template <class Element = PathComposeM> +inline void maybeComposePath(ABT& composition, ABT child) { + if (child.is<PathIdentity>()) { + return; + } + if (composition.is<PathIdentity>()) { + composition = std::move(child); + return; + } + + composition = make<Element>(std::move(composition), std::move(child)); +} + +/** + * Creates a balanced tree of composition elements over the input vector which it modifies in place. + * In the end at most one element remains in the vector. + */ +template <class Element = PathComposeM> +inline void maybeComposePaths(ABTVector& paths) { + while (paths.size() > 1) { + const size_t half = paths.size() / 2; + for (size_t i = 0; i < half; i++) { + maybeComposePath<Element>(paths.at(i), std::move(paths.back())); + paths.pop_back(); + } + } +} + +/** + * Appends a path to another path. Performs the append at PathIdentity elements. + */ +class PathAppender { +public: + PathAppender(ABT suffix) : _suffix(std::move(suffix)) {} + + void transport(ABT& n, const PathIdentity& node) { + n = _suffix; + } + + template <typename T, typename... Ts> + void transport(ABT& /*n*/, const T& /*node*/, Ts&&...) { + // noop + } + + /** + * Concatenate 'prefix' and 'suffix' by modifying 'prefix' in place. + */ + static void appendInPlace(ABT& prefix, ABT suffix) { + PathAppender instance{std::move(suffix)}; + algebra::transport<true>(prefix, instance); + } + + /** + * Return the concatenation of 'prefix' and 'suffix'. + */ + [[nodiscard]] static ABT append(ABT prefix, ABT suffix) { + appendInPlace(prefix, std::move(suffix)); + return prefix; + } + +private: + ABT _suffix; +}; + +/** + * Given a path and a MultikeynessTrie describing the path's input, + * removes any Traverse nodes that we know will never encounter an array. + * + * Returns true if any changes were made to the ABT. + */ +bool simplifyTraverseNonArray(ABT& path, const MultikeynessTrie& multikeynessTrie); + +/** + * Fuses an index path and a query path to determine a residual path to apply over the index + * results. Checks if one index path is a prefix of another. Considers only Get, Traverse, and Id. + * Return the suffix that doesn't match. + */ +struct IndexFusionResult { + boost::optional<ABT::reference_type> _suffix; + size_t _numTraversesSkipped = 0; + size_t _numTraversesFused = 0; +}; +IndexFusionResult fuseIndexPath(const ABT& node, const ABT& candidatePrefix); + +/** + * Check if a path contains a Traverse element. + */ +bool checkPathContainsTraverse(const ABT& path); + +/** + * This helper checks to see if we have a PathTraverse + PathId at the end of the path. + */ +bool pathEndsInTraverse(const optimizer::ABT& path); + +} // namespace mongo::optimizer diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp index f6de52ecc7c..3c12f562616 100644 --- a/src/mongo/db/query/optimizer/utils/utils.cpp +++ b/src/mongo/db/query/optimizer/utils/utils.cpp @@ -36,77 +36,11 @@ #include "mongo/db/query/optimizer/syntax/syntax.h" #include "mongo/db/query/optimizer/utils/ce_math.h" #include "mongo/db/query/optimizer/utils/interval_utils.h" +#include "mongo/db/query/optimizer/utils/path_utils.h" #include "mongo/db/storage/storage_parameters_gen.h" -namespace mongo::optimizer { - -std::vector<ABT::reference_type> collectComposed(const ABT& n) { - if (auto comp = n.cast<PathComposeM>(); comp) { - auto lhs = collectComposed(comp->getPath1()); - auto rhs = collectComposed(comp->getPath2()); - lhs.insert(lhs.end(), rhs.begin(), rhs.end()); - return lhs; - } - return {n.ref()}; -} - -// Helper function to count the size of a nested conjunction. -size_t countComposed(const ABT& n) { - if (auto comp = n.cast<PathComposeM>()) { - return countComposed(comp->getPath1()) + countComposed(comp->getPath2()); - } - return 1; -} - -std::vector<ABT::reference_type> collectComposedBounded(const ABT& n, size_t maxDepth) { - if (countComposed(n) > maxDepth) { - return {n.ref()}; - } - return collectComposed(n); -} - -static ABT appendFieldPath(const FieldPathType& fieldPath, ABT input) { - for (size_t index = fieldPath.size(); index-- > 0;) { - input = make<PathGet>(fieldPath.at(index), std::move(input)); - } - return input; -} - -boost::optional<ABT> decomposeToFilterNodes(const ABT& input, - const ABT& path, - const ABT& pathInput, - const size_t minDepth, - const size_t maxDepth) { - ABT::reference_type subPathRef = path.ref(); - FieldPathType fieldPath; - while (const auto newPath = subPathRef.cast<PathGet>()) { - fieldPath.push_back(newPath->name()); - subPathRef = newPath->getPath().ref(); - } - - ABT subPath = subPathRef; - if (auto composition = collectComposedBounded(subPath, maxDepth); - composition.size() >= minDepth) { - // Remove the path composition and insert two filter nodes. - ABT result = input; - for (const auto& element : composition) { - result = - make<FilterNode>(make<EvalFilter>(appendFieldPath(fieldPath, element), pathInput), - std::move(result)); - } - return result; - } - - return boost::none; -} -bool isSimplePath(const ABT& node) { - if (auto getPtr = node.cast<PathGet>(); - getPtr != nullptr && getPtr->getPath().is<PathIdentity>()) { - return true; - } - return false; -} +namespace mongo::optimizer { ProjectionNameOrderedSet convertToOrderedSet(ProjectionNameSet unordered) { ProjectionNameOrderedSet ordered; @@ -751,147 +685,6 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq( return result; } -/** - * Check if an index path contains a Traverse element. - */ -class PathTraverseChecker { -public: - PathTraverseChecker() {} - - bool transport(const ABT& /*n*/, const PathTraverse& /*node*/, bool /*childResult*/) { - return true; - } - - bool transport(const ABT& /*n*/, const PathGet& /*node*/, bool childResult) { - return childResult; - } - - bool transport(const ABT& /*n*/, const PathIdentity& /*node*/) { - return false; - } - - template <typename T, typename... Ts> - bool transport(const ABT& /*n*/, const T& /*node*/, Ts&&...) { - uasserted(6624153, "Index paths only consist of Get, Traverse, and Id nodes."); - return false; - } - - bool check(const ABT& path) { - return algebra::transport<true>(path, *this); - } -}; - -bool checkPathContainsTraverse(const ABT& path) { - return PathTraverseChecker{}.check(path); -} - -/** - * Removes Traverse nodes from a single path, using MultikeynessTrie which tells us - * which child paths are never applied to an array. - */ -class MultikeynessSimplifier { -public: - bool operator()(ABT&, PathIdentity&, const MultikeynessTrie&, bool /*skippedParentTraverse*/) { - // No simplifications apply here. - return false; - } - - bool operator()(ABT& path, - PathGet& get, - const MultikeynessTrie& trie, - bool skippedParentTraverse) { - if (auto it = trie.children.find(get.name()); it != trie.children.end()) { - return get.getPath().visit(*this, it->second, false /*skippedParentTraverse*/); - } else { - return false; - } - } - - bool operator()(ABT& path, - PathTraverse& traverse, - const MultikeynessTrie& trie, - bool skippedParentTraverse) { - tassert(6859603, - "Unexpected maxDepth for Traverse in MultikeynessSimplifier", - traverse.getMaxDepth() == PathTraverse::kSingleLevel); - - if (!trie.isMultiKey) { - // This path is never applied to an array: we can remove any number of Traverse nodes, - // of any maxDepth. - path = std::exchange(traverse.getPath(), make<Blackhole>()); - // The parent can't have been a Traverse that we skipped, because we would have - // removed it, because !trie.isMultiKey. - invariant(!skippedParentTraverse); - path.visit(*this, trie, false /*skippedParentTraverse*/); - return true; - } else if (traverse.getMaxDepth() == PathTraverse::kSingleLevel && !skippedParentTraverse) { - // This path is possibly multikey, so we can't remove any Traverse nodes. - // But each edge in the trie represents a 'Traverse [1] Get [a]', so we can - // skip a single Traverse [1] node. - return traverse.getPath().visit(*this, trie, true /*skippedParentTraverse*/); - } else { - // We have no information about multikeyness of the child path. - return false; - } - } - - bool operator()(ABT& path, - PathLambda& pathLambda, - const MultikeynessTrie& trie, - bool skippedParentTraverse) { - // Look for PathLambda Lambda [tmp] UnaryOp [Not] EvalFilter <path> Variable [tmp], - // and simplify <path>. This works because 'tmp' is the same variable name in both places, - // so <path> is applied to the same input as the PathLambda. (And the 'trie' tells us - // which parts of that input are not arrays.) - - // In the future we may want to generalize this to skip over other expressions besides Not, - // as long as the Lambda and EvalFilter are connected by a variable. - - if (auto* lambda = pathLambda.getLambda().cast<LambdaAbstraction>()) { - if (auto* unary = lambda->getBody().cast<UnaryOp>(); - unary && unary->op() == Operations::Not) { - if (auto* evalFilter = unary->getChild().cast<EvalFilter>()) { - if (auto* variable = evalFilter->getInput().cast<Variable>(); - variable && variable->name() == lambda->varName()) { - return evalFilter->getPath().visit( - *this, trie, false /*skippedParentTraverse*/); - } - } - } - } - return false; - } - - bool operator()(ABT& path, - PathComposeM& compose, - const MultikeynessTrie& trie, - bool skippedParentTraverse) { - const bool simplified1 = compose.getPath1().visit(*this, trie, skippedParentTraverse); - const bool simplified2 = compose.getPath2().visit(*this, trie, skippedParentTraverse); - return simplified1 || simplified2; - } - - template <typename T, typename... Ts> - bool operator()(ABT& n, T& /*node*/, Ts&&...) { - // Don't optimize a node we don't recognize. - return false; - - // Some other cases to consider: - // - Remove PathArr for non-multikey paths. - // - Descend into disjunction. - // - Descend into PathLambda and simplify expressions, especially Not and EvalFilter. - } - - static bool simplify(ABT& path, const MultikeynessTrie& trie) { - MultikeynessSimplifier instance; - return path.visit(instance, trie, false /*skippedParentTraverse*/); - } -}; - -bool simplifyTraverseNonArray(ABT& path, const MultikeynessTrie& multikeynessTrie) { - return MultikeynessSimplifier::simplify(path, multikeynessTrie); -} - bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName, const MultikeynessTrie& multikeynessTrie, PartialSchemaRequirements& reqMap, @@ -1162,69 +955,6 @@ const ProjectionName& getExistingOrTempProjForFieldName(PrefixId& prefixId, } /** - * Fuses an index path and a query path to determine a residual path to apply over the index - * results. Checks if one index path is a prefix of another. Considers only Get, Traverse, and Id. - * Return the suffix that doesn't match. - */ -class IndexPathFusor { -public: - struct ResultType { - boost::optional<ABT::reference_type> _suffix; - size_t _numTraversesSkipped = 0; - size_t _numTraversesFused = 0; - }; - - /** - * 'n' - The complete index path being compared to, can be modified if needed. - * 'node' - Same as 'n' but cast to a specific type by the caller in order to invoke the - * correct operator. - * 'other' - The query path, of which the index may satisfy a prefix. - */ - ResultType operator()(const ABT& n, const PathGet& node, const ABT& other) { - if (auto otherGet = other.cast<PathGet>(); - otherGet != nullptr && otherGet->name() == node.name()) { - if (auto otherChildTraverse = otherGet->getPath().cast<PathTraverse>(); - otherChildTraverse != nullptr && !node.getPath().is<PathTraverse>()) { - // If a query path has a Traverse, but the index path doesn't, the query can - // still be evaluated by this index. Skip the Traverse node, and continue matching. - // This works because we know the Traverse will never be applied to an array, - // so 'Traverse [anything] p == p'. - auto result = node.getPath().visit(*this, otherChildTraverse->getPath()); - result._numTraversesSkipped++; - return result; - } else { - return node.getPath().visit(*this, otherGet->getPath()); - } - } - return {}; - } - - ResultType operator()(const ABT& n, const PathTraverse& node, const ABT& other) { - if (auto otherTraverse = other.cast<PathTraverse>(); - otherTraverse != nullptr && otherTraverse->getMaxDepth() == node.getMaxDepth()) { - auto result = node.getPath().visit(*this, otherTraverse->getPath()); - result._numTraversesFused++; - return result; - } - return {}; - } - - ResultType operator()(const ABT& n, const PathIdentity& node, const ABT& other) { - return {other.ref()}; - } - - template <typename T, typename... Ts> - ResultType operator()(const ABT& /*n*/, const T& /*node*/, Ts&&...) { - uasserted(6624152, "Unexpected node type"); - } - - static ResultType fuse(const ABT& node, const ABT& candidatePrefix) { - IndexPathFusor instance; - return candidatePrefix.visit(instance, node); - } -}; - -/** * Pad compound interval with supplied simple interval. */ static void extendCompoundInterval(const IndexCollationSpec& indexCollationSpec, @@ -1394,7 +1124,7 @@ static bool computeCandidateIndexEntry(PrefixId& prefixId, for (size_t indexField = 0; indexField < indexCollationSpec.size(); indexField++) { if (const auto fusedPath = - IndexPathFusor::fuse(queryKey._path, indexCollationSpec.at(indexField)._path); + fuseIndexPath(queryKey._path, indexCollationSpec.at(indexField)._path); fusedPath._suffix && (allowFuseTraverse[indexField] || fusedPath._numTraversesFused == 0)) { if (fusedPath._numTraversesFused > 0) { @@ -2612,34 +2342,6 @@ PhysPlanBuilder lowerEqPrefixes(PrefixId& prefixId, return lowerTransport.lower(eqPrefixes.at(eqPrefixIndex)._interval); } -/** - * Checks if a path ends in a Traverse + PathId. - */ -class PathEndsInTraverseId { -public: - bool transport(const optimizer::PathTraverse& node, bool childResult) { - return node.getPath().is<PathIdentity>() || childResult; - } - - bool transport(const optimizer::PathGet& /*node*/, bool childResult) { - return childResult; - } - - bool transport(const optimizer::PathIdentity& /*node*/) { - return false; - } - - template <typename T, typename... Ts> - bool transport(const T& node, Ts&&... /* args */) { - uasserted(6749500, "Unexpected node in transport to check if path is $elemMatch."); - } -}; - -bool pathEndsInTraverse(const optimizer::ABT& path) { - PathEndsInTraverseId t; - return optimizer::algebra::transport<false>(path, t); -} - bool hasProperIntervals(const PartialSchemaRequirements& reqMap) { // Compute if this node has any proper (not fully open) intervals. for (const auto& [key, req] : reqMap.conjuncts()) { diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h index 59ba2f30542..3d6e167580a 100644 --- a/src/mongo/db/query/optimizer/utils/utils.h +++ b/src/mongo/db/query/optimizer/utils/utils.h @@ -66,76 +66,6 @@ inline size_t computeHashSeq(const Args&... seq) { } /** - * Returns a vector all paths nested under conjunctions (PathComposeM) in the given path. - * For example, PathComposeM(PathComposeM(Foo, Bar), Baz) returns [Foo, Bar, Baz]. - * If the given path is not a conjunction, returns a vector with the given path. - */ -std::vector<ABT::reference_type> collectComposed(const ABT& n); - -/** - * Like collectComposed() but bounded by a maximum number of composed paths. - * If the given path has more PathComposeM;s than specified by maxDepth, then return a vector - * with the given path. Otherwise, returns the result of collectComposed(). - * - * This is useful for preventing the optimizer from unintentionally creating a very deep tree which - * causes stack-overflow on a recursive traversal. - */ -std::vector<ABT::reference_type> collectComposedBounded(const ABT& n, size_t maxDepth); - -/** - * De-compose a path and an input to an EvalFilter into sequence of Filter nodes. If we have a path - * with a prefix of PathGet's followed by a series of nested PathComposeM, then split into two or - * more filter nodes at the composition and retain the prefix for each. The result is a tree of - * chained filter nodes. We return an empty result if we have less than "minDepth" sub-tress which - * are composed. If "minDepth" = 1, then we are guaranteed to return a result, which will consist of - * a single Filter node. - * - * If the number of compositions exceeds "maxDepth" then we return the a single FilterNode - * consisting of an EvalFilter over the original path and input. - * - * TODO: SERVER-73744. Consolidate usages in a new optimizer phase. - */ -constexpr size_t kMaxPathConjunctionDecomposition = 20; -boost::optional<ABT> decomposeToFilterNodes(const ABT& input, - const ABT& path, - const ABT& pathInput, - size_t minDepth, - size_t maxDepth = kMaxPathConjunctionDecomposition); - -/** - * Returns true if the path represented by 'node' is of the form PathGet "field" PathId - */ -bool isSimplePath(const ABT& node); - -template <class Element = PathComposeM> -inline void maybeComposePath(ABT& composition, ABT child) { - if (child.is<PathIdentity>()) { - return; - } - if (composition.is<PathIdentity>()) { - composition = std::move(child); - return; - } - - composition = make<Element>(std::move(composition), std::move(child)); -} - -/** - * Creates a balanced tree of composition elements over the input vector which it modifies in place. - * In the end at most one element remains in the vector. - */ -template <class Element = PathComposeM> -inline void maybeComposePaths(ABTVector& paths) { - while (paths.size() > 1) { - const size_t half = paths.size() / 2; - for (size_t i = 0; i < half; i++) { - maybeComposePath<Element>(paths.at(i), std::move(paths.back())); - paths.pop_back(); - } - } -} - -/** * Used to access and manipulate the child of a unary node. */ template <class NodeType> @@ -273,42 +203,6 @@ CollationSplitResult splitCollationSpec(const boost::optional<ProjectionName>& r const ProjectionNameSet& leftProjections, const ProjectionNameSet& rightProjections); -/** - * Appends a path to another path. Performs the append at PathIdentity elements. - */ -class PathAppender { -public: - PathAppender(ABT suffix) : _suffix(std::move(suffix)) {} - - void transport(ABT& n, const PathIdentity& node) { - n = _suffix; - } - - template <typename T, typename... Ts> - void transport(ABT& /*n*/, const T& /*node*/, Ts&&...) { - // noop - } - - /** - * Concatenate 'prefix' and 'suffix' by modifying 'prefix' in place. - */ - static void appendInPlace(ABT& prefix, ABT suffix) { - PathAppender instance{std::move(suffix)}; - algebra::transport<true>(prefix, instance); - } - - /** - * Return the concatenation of 'prefix' and 'suffix'. - */ - [[nodiscard]] static ABT append(ABT prefix, ABT suffix) { - appendInPlace(prefix, std::move(suffix)); - return prefix; - } - -private: - ABT _suffix; -}; - struct PartialSchemaReqConversion { PartialSchemaReqConversion(PartialSchemaRequirements reqMap); PartialSchemaReqConversion(ABT bound); @@ -346,14 +240,6 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq( const ABT& expr, bool isFilterContext, const PathToIntervalFn& pathToInterval); /** - * Given a path and a MultikeynessTrie describing the path's input, - * removes any Traverse nodes that we know will never encounter an array. - * - * Returns true if any changes were made to the ABT. - */ -bool simplifyTraverseNonArray(ABT& path, const MultikeynessTrie& multikeynessTrie); - -/** * Given a set of non-multikey paths, remove redundant Traverse elements from paths in a Partial * Schema Requirement structure. Returns true if we have an empty result after simplification. */ @@ -363,11 +249,6 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName, const ConstFoldFn& constFold); /** - * Check if a path contains a Traverse element. - */ -bool checkPathContainsTraverse(const ABT& path); - -/** * Try to check whether the predicate 'lhs' is a subset of 'rhs'. * * True means 'lhs' is contained in 'rhs': every document that matches @@ -532,10 +413,5 @@ PhysPlanBuilder lowerEqPrefixes(PrefixId& prefixId, CEType indexCE, CEType scanGroupCE); -/** - * This helper checks to see if we have a PathTraverse + PathId at the end of the path. - */ -bool pathEndsInTraverse(const optimizer::ABT& path); - bool hasProperIntervals(const PartialSchemaRequirements& reqMap); } // namespace mongo::optimizer |