diff options
-rw-r--r-- | jstests/core/timeseries/timeseries_index_use.js | 3 | ||||
-rw-r--r-- | src/mongo/db/exec/timeseries/bucket_spec.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo.cpp | 265 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo.h | 103 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo_test.cpp | 63 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_expr.h | 13 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_path.h | 61 | ||||
-rw-r--r-- | src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression.cpp | 15 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression.h | 30 | ||||
-rw-r--r-- | src/mongo/db/query/expression_walker.h | 2 | ||||
-rw-r--r-- | src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp | 10 |
12 files changed, 444 insertions, 130 deletions
diff --git a/jstests/core/timeseries/timeseries_index_use.js b/jstests/core/timeseries/timeseries_index_use.js index ebcea9d8e6b..dda7471dd5a 100644 --- a/jstests/core/timeseries/timeseries_index_use.js +++ b/jstests/core/timeseries/timeseries_index_use.js @@ -403,8 +403,7 @@ const generateTest = (useHint) => { {}, collation); - /*********************************** Tests $expr predicates - * *********************************/ + /*********************************** Tests $expr predicates *******************************/ resetCollections(); assert.commandWorked(insert(coll, [ {_id: 0, [timeFieldName]: ISODate('1990-01-01 00:00:00.000Z'), [metaFieldName]: 2}, diff --git a/src/mongo/db/exec/timeseries/bucket_spec.cpp b/src/mongo/db/exec/timeseries/bucket_spec.cpp index 9ef6f425833..31a79ed7565 100644 --- a/src/mongo/db/exec/timeseries/bucket_spec.cpp +++ b/src/mongo/db/exec/timeseries/bucket_spec.cpp @@ -686,11 +686,10 @@ BucketSpec::BucketPredicate BucketSpec::createPredicatesOnBucketLevelField( if (!includeMetaField) return handleIneligible(policy, matchExpr, "cannot handle an excluded meta field"); - if (expression::hasOnlyRenameableMatchExpressionChildren(*matchExpr)) { - auto looseResult = matchExpr->clone(); - expression::applyRenamesToExpression( - looseResult.get(), + if (auto looseResult = expression::copyExpressionAndApplyRenames( + matchExpr, {{bucketSpec.metaField().value(), timeseries::kBucketMetaFieldName.toString()}}); + looseResult) { auto tightResult = looseResult->clone(); return {std::move(looseResult), std::move(tightResult)}; } else { diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp index d44c8b88b58..cca638c3ab5 100644 --- a/src/mongo/db/matcher/expression_algo.cpp +++ b/src/mongo/db/matcher/expression_algo.cpp @@ -27,13 +27,11 @@ * it in the license file. */ - -#include "mongo/platform/basic.h" +#include "mongo/db/matcher/expression_algo.h" #include "mongo/base/checked_cast.h" #include "mongo/bson/unordered_fields_bsonobj_comparator.h" #include "mongo/db/matcher/expression.h" -#include "mongo/db/matcher/expression_algo.h" #include "mongo/db/matcher/expression_array.h" #include "mongo/db/matcher/expression_expr.h" #include "mongo/db/matcher/expression_geo.h" @@ -377,12 +375,20 @@ unique_ptr<MatchExpression> createNorOfNodes(std::vector<unique_ptr<MatchExpress std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitMatchExpressionByFunction( unique_ptr<MatchExpression> expr, const OrderedPathSet& fields, + const StringMap<std::string>& renames, + expression::Renameables& renameables, expression::ShouldSplitExprFunc shouldSplitOut) { - if (shouldSplitOut(*expr, fields)) { + if (shouldSplitOut(*expr, fields, renames, renameables)) { // 'expr' satisfies our split condition and can be completely split out. return {std::move(expr), nullptr}; } + // At this point, the content of 'renameables' is no longer applicable because we chose not to + // proceed with the wholesale extraction of 'expr', or we try to find portion of 'expr' that can + // be split out by recursing down. In either case, we want to restart our renamable analysis and + // reset the state. + renameables.clear(); + if (expr->getCategory() != MatchExpression::MatchCategory::kLogical) { // 'expr' is a leaf and cannot be split out. return {nullptr, std::move(expr)}; @@ -395,13 +401,17 @@ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitMatchEx case MatchExpression::AND: { auto andExpr = checked_cast<AndMatchExpression*>(expr.get()); for (size_t i = 0; i < andExpr->numChildren(); i++) { + expression::Renameables childRenameables; auto children = splitMatchExpressionByFunction( - andExpr->releaseChild(i), fields, shouldSplitOut); + andExpr->releaseChild(i), fields, renames, childRenameables, shouldSplitOut); invariant(children.first || children.second); if (children.first) { splitOut.push_back(std::move(children.first)); + // Accumulate the renameable expressions from the children. + renameables.insert( + renameables.end(), childRenameables.begin(), childRenameables.end()); } if (children.second) { remaining.push_back(std::move(children.second)); @@ -420,9 +430,13 @@ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitMatchEx // is equal to 1. auto norExpr = checked_cast<NorMatchExpression*>(expr.get()); for (size_t i = 0; i < norExpr->numChildren(); i++) { + expression::Renameables childRenameables; auto child = norExpr->releaseChild(i); - if (shouldSplitOut(*child, fields)) { + if (shouldSplitOut(*child, fields, renames, childRenameables)) { splitOut.push_back(std::move(child)); + // Accumulate the renameable expressions from the children. + renameables.insert( + renameables.end(), childRenameables.begin(), childRenameables.end()); } else { remaining.push_back(std::move(child)); } @@ -776,21 +790,115 @@ bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) { return false; } -bool hasOnlyRenameableMatchExpressionChildren(const MatchExpression& expr) { +// Type requirements for the hashOnlyRenameableMatchExpressionChildrenImpl() & isIndependentOfImpl() +// & isOnlyDependentOnImpl() functions +template <bool IsMutable, typename T> +using MaybeMutablePtr = typename std::conditional<IsMutable, T*, const T*>::type; + +// const MatchExpression& should be passed with no 'renameables' argument to traverse the expression +// tree in read-only mode. +template <typename E, typename... Args> +concept ConstTraverseMatchExpression = requires(E&& expr, Args&&... args) { + sizeof...(Args) == 0 && std::is_same_v<const MatchExpression&, E>; +}; + +// MatchExpression& should be passed with a single 'renameables' argument to traverse the expression +// tree in read-write mode. +template <typename E, typename... Args> +constexpr bool shouldCollectRenameables = std::is_same_v<MatchExpression&, E> && + sizeof...(Args) == 1 && (std::is_same_v<Renameables&, Args> && ...); + +// Traversing the expression tree in read-write mode is same as the 'shouldCollectRenameables'. +template <typename E, typename... Args> +concept MutableTraverseMatchExpression = shouldCollectRenameables<E, Args...>; + +// We traverse the expression tree in either read-only mode or read-write mode. +template <typename E, typename... Args> +requires ConstTraverseMatchExpression<E, Args...> || MutableTraverseMatchExpression<E, Args...> +bool hasOnlyRenameableMatchExpressionChildrenImpl(E&& expr, + const StringMap<std::string>& renames, + Args&&... renameables) { + constexpr bool mutating = shouldCollectRenameables<E, Args...>; + if (expr.matchType() == MatchExpression::MatchType::EXPRESSION) { + if constexpr (mutating) { + auto exprExpr = checked_cast<MaybeMutablePtr<mutating, ExprMatchExpression>>(&expr); + if (renames.size() > 0 && exprExpr->hasRenameablePath(renames)) { + // The second element is ignored for $expr. + (renameables.emplace_back(exprExpr, ""_sd), ...); + } + } + return true; - } else if (expr.getCategory() == MatchExpression::MatchCategory::kOther) { + } + + if (expr.getCategory() == MatchExpression::MatchCategory::kOther) { + if constexpr (mutating) { + (renameables.clear(), ...); + } return false; - } else if (expr.getCategory() == MatchExpression::MatchCategory::kLogical) { - for (size_t i = 0; i < expr.numChildren(); i++) { - if (!hasOnlyRenameableMatchExpressionChildren(*expr.getChild(i))) { - return false; + } + + if (expr.getCategory() == MatchExpression::MatchCategory::kArrayMatching || + expr.getCategory() == MatchExpression::MatchCategory::kLeaf) { + auto pathExpr = checked_cast<MaybeMutablePtr<mutating, PathMatchExpression>>(&expr); + if (renames.size() == 0 || !pathExpr->optPath()) { + return true; + } + + // Cannot proceed to dependency or independence checks if any attempted rename would fail. + auto&& [wouldSucceed, optNewPath] = pathExpr->wouldRenameSucceed(renames); + if (!wouldSucceed) { + if constexpr (mutating) { + (renameables.clear(), ...); } + return false; + } + + if constexpr (mutating) { + if (optNewPath) { + (renameables.emplace_back(pathExpr, *optNewPath), ...); + } + } + + return true; + } + + tassert(7585300, + "Expression category must be logical at this point", + expr.getCategory() == MatchExpression::MatchCategory::kLogical); + for (size_t i = 0; i < expr.numChildren(); ++i) { + bool hasOnlyRenameables = [&] { + if constexpr (mutating) { + return (hasOnlyRenameableMatchExpressionChildrenImpl( + *(expr.getChild(i)), renames, std::forward<Args>(renameables)), + ...); + } else { + return hasOnlyRenameableMatchExpressionChildrenImpl(*(expr.getChild(i)), renames); + } + }(); + if (!hasOnlyRenameables) { + if constexpr (mutating) { + (renameables.clear(), ...); + } + return false; } } + return true; } +bool hasOnlyRenameableMatchExpressionChildren(MatchExpression& expr, + const StringMap<std::string>& renames, + Renameables& renameables) { + return hasOnlyRenameableMatchExpressionChildrenImpl(expr, renames, renameables); +} + +bool hasOnlyRenameableMatchExpressionChildren(const MatchExpression& expr, + const StringMap<std::string>& renames) { + return hasOnlyRenameableMatchExpressionChildrenImpl(expr, renames); +} + bool containsDependency(const OrderedPathSet& testSet, const OrderedPathSet& prefixCandidates) { if (testSet.empty()) { return false; @@ -853,10 +961,27 @@ bool areIndependent(const OrderedPathSet& pathSet1, const OrderedPathSet& pathSe return !containsDependency(pathSet1, pathSet2) && !containsDependency(pathSet2, pathSet1); } -bool isIndependentOf(const MatchExpression& expr, const OrderedPathSet& pathSet) { +template <typename E, typename... Args> +requires ConstTraverseMatchExpression<E, Args...> || MutableTraverseMatchExpression<E, Args...> +bool isIndependentOfImpl(E&& expr, + const OrderedPathSet& pathSet, + const StringMap<std::string>& renames, + Args&&... renameables) { + constexpr bool mutating = shouldCollectRenameables<E, Args...>; + // Any expression types that do not have renaming implemented cannot have their independence // evaluated here. See applyRenamesToExpression(). - if (!hasOnlyRenameableMatchExpressionChildren(expr)) { + bool hasOnlyRenameables = [&] { + if constexpr (mutating) { + return (hasOnlyRenameableMatchExpressionChildrenImpl( + expr, renames, std::forward<Args>(renameables)), + ...); + } else { + return hasOnlyRenameableMatchExpressionChildrenImpl(expr, renames); + } + }(); + + if (!hasOnlyRenameables) { return false; } @@ -869,10 +994,42 @@ bool isIndependentOf(const MatchExpression& expr, const OrderedPathSet& pathSet) return areIndependent(pathSet, depsTracker.fields); } -bool isOnlyDependentOn(const MatchExpression& expr, const OrderedPathSet& pathSet) { +bool isIndependentOf(MatchExpression& expr, + const OrderedPathSet& pathSet, + const StringMap<std::string>& renames, + Renameables& renameables) { + return isIndependentOfImpl(expr, pathSet, renames, renameables); +} + +bool isIndependentOfConst(const MatchExpression& expr, + const OrderedPathSet& pathSet, + const StringMap<std::string>& renames) { + return isIndependentOfImpl(expr, pathSet, renames); +} + +template <typename E, typename... Args> +requires ConstTraverseMatchExpression<E, Args...> || MutableTraverseMatchExpression<E, Args...> +bool isOnlyDependentOnImpl(E&& expr, + const OrderedPathSet& pathSet, + const StringMap<std::string>& renames, + Args&&... renameables) { + constexpr bool mutating = shouldCollectRenameables<E, Args...>; + // Any expression types that do not have renaming implemented cannot have their independence // evaluated here. See applyRenamesToExpression(). - if (!hasOnlyRenameableMatchExpressionChildren(expr)) { + bool hasOnlyRenameables = [&] { + if constexpr (mutating) { + return (hasOnlyRenameableMatchExpressionChildrenImpl( + expr, renames, std::forward<Args>(renameables)), + ...); + } else { + return hasOnlyRenameableMatchExpressionChildrenImpl(expr, renames); + } + }(); + + // Any expression types that do not have renaming implemented cannot have their independence + // evaluated here. See applyRenamesToExpression(). + if (!hasOnlyRenameables) { return false; } @@ -896,57 +1053,57 @@ bool isOnlyDependentOn(const MatchExpression& expr, const OrderedPathSet& pathSe DepsTracker::simplifyDependencies(pathsDepsCopy, DepsTracker::TruncateToRootLevel::no); } +bool isOnlyDependentOn(MatchExpression& expr, + const OrderedPathSet& pathSet, + const StringMap<std::string>& renames, + Renameables& renameables) { + return isOnlyDependentOnImpl(expr, pathSet, renames, renameables); +} + +bool isOnlyDependentOnConst(const MatchExpression& expr, + const OrderedPathSet& pathSet, + const StringMap<std::string>& renames) { + return isOnlyDependentOnImpl(expr, pathSet, renames); +} + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitMatchExpressionBy( unique_ptr<MatchExpression> expr, const OrderedPathSet& fields, const StringMap<std::string>& renames, ShouldSplitExprFunc func /*= isIndependentOf */) { - auto splitExpr = splitMatchExpressionByFunction(expr->clone(), fields, func); - if (splitExpr.first) { - // If we get attemptedButFailedRenames == true, then it means we could not apply renames - // though there's sub-path match. In such a case, returns the original expression as the - // residual expression so that the match is not mistakenly swapped with the previous stage. - // Otherwise, the unrenamed $match would be swapped with the previous stage. - // - // TODO SERVER-74298 Remove if clause and just call applyRenamesToExpression(). - if (auto attemptedButFailedRenames = - applyRenamesToExpression(splitExpr.first.get(), renames); - attemptedButFailedRenames) { - return {nullptr, std::move(expr)}; - } + Renameables renameables; + auto splitExpr = + splitMatchExpressionByFunction(std::move(expr), fields, renames, renameables, func); + if (splitExpr.first && !renames.empty()) { + applyRenamesToExpression(renames, &renameables); } return splitExpr; } -// TODO SERVER-74298 Remove the return value. -// As soon as we find the first attempted but failed rename, we cancel the match expression tree -// traversal because the caller would return the original match expression. -bool applyRenamesToExpression(MatchExpression* expr, const StringMap<std::string>& renames) { - if (expr->matchType() == MatchExpression::MatchType::EXPRESSION) { - ExprMatchExpression* exprExpr = checked_cast<ExprMatchExpression*>(expr); - exprExpr->applyRename(renames); - return false; - } - - if (expr->getCategory() == MatchExpression::MatchCategory::kOther) { - return false; - } - - if (expr->getCategory() == MatchExpression::MatchCategory::kArrayMatching || - expr->getCategory() == MatchExpression::MatchCategory::kLeaf) { - auto* pathExpr = checked_cast<PathMatchExpression*>(expr); - if (pathExpr->applyRename(renames)) { - return true; +void applyRenamesToExpression(const StringMap<std::string>& renames, + const Renameables* renameables) { + tassert(7585301, "Invalid argument", renameables); + for (auto&& [matchExpr, newPath] : *renameables) { + if (stdx::holds_alternative<PathMatchExpression*>(matchExpr)) { + // PathMatchExpression. + stdx::get<PathMatchExpression*>(matchExpr)->setPath(newPath); + } else { + // ExprMatchExpression. + stdx::get<ExprMatchExpression*>(matchExpr)->applyRename(renames); } } +} - for (size_t i = 0; i < expr->numChildren(); ++i) { - if (applyRenamesToExpression(expr->getChild(i), renames)) { - return true; - } +std::unique_ptr<MatchExpression> copyExpressionAndApplyRenames( + const MatchExpression* expr, const StringMap<std::string>& renames) { + Renameables renameables; + if (auto exprCopy = expr->clone(); + hasOnlyRenameableMatchExpressionChildren(*exprCopy, renames, renameables)) { + applyRenamesToExpression(renames, &renameables); + return exprCopy; + } else { + return nullptr; } - - return false; } void mapOver(MatchExpression* expr, NodeTraversalFunc func, std::string path) { diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h index 2d2b483c244..0576ed04a3f 100644 --- a/src/mongo/db/matcher/expression_algo.h +++ b/src/mongo/db/matcher/expression_algo.h @@ -39,7 +39,9 @@ namespace mongo { +class ExprMatchExpression; class MatchExpression; +class PathMatchExpression; struct DepsTracker; namespace expression { @@ -53,10 +55,33 @@ using NodeTraversalFunc = std::function<void(MatchExpression*, std::string)>; */ bool hasExistencePredicateOnPath(const MatchExpression& expr, StringData path); +using PathOrExprMatchExpression = stdx::variant<PathMatchExpression*, ExprMatchExpression*>; +using Renameables = std::vector<std::pair<PathOrExprMatchExpression, std::string>>; + +/** + * Checks if 'expr' has any subexpression which does not have renaming implemented or has renaming + * implemented but may fail to rename for any one of 'renames'. If there's any such subexpression, + * we should not proceed with renaming. + */ +bool hasOnlyRenameableMatchExpressionChildren(const MatchExpression& expr, + const StringMap<std::string>& renames); + /** - * Checks if 'expr' has any children which do not have renaming implemented. + * Checks if 'expr' has any subexpression which does not have renaming implemented or has renaming + * implemented but may fail to rename for any one of 'renames'. If there's any such subexpression, + * we should not proceed with renaming. + * + * This function also fills out 'renameables' with the renameable subexpressions. For + * PathMatchExpression, the new path is returned in the second element of the pair. For + * ExprMatchExpression, the second element should be ignored and renames must be applied based on + * the full 'renames' map. + * + * Note: The 'renameables' is filled out while traversing the tree and so, designated as an output + * parameter as an optimization to avoid traversing the tree again unnecessarily. */ -bool hasOnlyRenameableMatchExpressionChildren(const MatchExpression& expr); +bool hasOnlyRenameableMatchExpressionChildren(MatchExpression& expr, + const StringMap<std::string>& renames, + Renameables& renameables); /** * Returns true if the documents matched by 'lhs' are a subset of the documents matched by @@ -122,14 +147,40 @@ bool containsOverlappingPaths(const OrderedPathSet& testSet); bool containsEmptyPaths(const OrderedPathSet& testSet); /** - * Determine if 'expr' is reliant upon any path from 'pathSet'. + * Determines if 'expr' is reliant upon any path from 'pathSet' and can be renamed by 'renames'. */ -bool isIndependentOf(const MatchExpression& expr, const OrderedPathSet& pathSet); +bool isIndependentOfConst(const MatchExpression& expr, + const OrderedPathSet& pathSet, + const StringMap<std::string>& renames = {}); /** - * Determine if 'expr' is reliant only upon paths from 'pathSet'. + * Determines if 'expr' is reliant upon any path from 'pathSet' and can be renamed by 'renames'. + * + * Note: For a description of the expected value returned in the 'renameables' output parameter, see + * the documentation for the 'hasOnlyRenameableMatchExpressionChildren()' function. */ -bool isOnlyDependentOn(const MatchExpression& expr, const OrderedPathSet& pathSet); +bool isIndependentOf(MatchExpression& expr, + const OrderedPathSet& pathSet, + const StringMap<std::string>& renames, + Renameables& renameables); + +/** + * Determines if 'expr' is reliant only upon paths from 'pathSet' and can be renamed by 'renames'. + */ +bool isOnlyDependentOnConst(const MatchExpression& expr, + const OrderedPathSet& pathSet, + const StringMap<std::string>& renames = {}); + +/** + * Determines if 'expr' is reliant only upon paths from 'pathSet' and can be renamed by 'renames'. + * + * Note: For a description of the expected value returned in the 'renameables' output parameter, see + * the documentation for the 'hasOnlyRenameableMatchExpressionChildren()' function. + */ +bool isOnlyDependentOn(MatchExpression& expr, + const OrderedPathSet& pathSet, + const StringMap<std::string>& renames, + Renameables& renameables); /** * Returns whether the path represented by 'first' is an prefix of the path represented by 'second'. @@ -156,7 +207,8 @@ bool bidirectionalPathPrefixOf(StringData first, StringData second); */ void mapOver(MatchExpression* expr, NodeTraversalFunc func, std::string path = ""); -using ShouldSplitExprFunc = std::function<bool(const MatchExpression&, const OrderedPathSet&)>; +using ShouldSplitExprFunc = std::function<bool( + MatchExpression&, const OrderedPathSet&, const StringMap<std::string>&, Renameables&)>; /** * Attempt to split 'expr' into two MatchExpressions according to 'func'. 'func' describes the @@ -176,12 +228,6 @@ using ShouldSplitExprFunc = std::function<bool(const MatchExpression&, const Ord * original match expression is {old: {$gt: 3}} and 'renames' contains the mapping "old" => "new". * The returned exprLeft value will be {new: {$gt: 3}}, provided that "old" is not in 'fields'. * - * If the previous stage is a simple rename, 'fields' should be empty and 'renames' are attempted - * but due to the limitation of renaming algorithm, we may fail to rename, when we return the - * original expression as residualExpr. - * - * TODO SERVER-74298 Remove the above comment after the ticket is done. - * * Never returns {nullptr, nullptr}. */ std::pair<std::unique_ptr<MatchExpression>, std::unique_ptr<MatchExpression>> @@ -191,21 +237,32 @@ splitMatchExpressionBy(std::unique_ptr<MatchExpression> expr, ShouldSplitExprFunc func = isIndependentOf); /** - * Applies the renames specified in 'renames' to 'expr'. 'renames' maps from path names in 'expr' - * to the new values of those paths. For example, suppose the original match expression is + * Applies the renames specified in 'renames' & 'renameables'. 'renames' maps from path names in + * 'expr' to the new values of those paths. For example, suppose the original match expression is * {old: {$gt: 3}} and 'renames' contains the mapping "old" => "new". At the end, 'expr' will be * {new: {$gt: 3}}. * - * The caller should make sure that `expr` is renamable as a whole. - * - * Returns whether there's any attempted but failed to rename. This case can happen when path - * component is part of sub-fields. For example, expr = {x: {$eq: {y: 3}}} and renames = {{"x.y", - * "a.b"}}. We should be able to rename 'x' and 'y' to 'a' and 'b' respectively but due to the - * current limitation of renaming algorithm, we cannot rename such match expressions. + * In order to do an in-place renaming of the match expression tree, the caller should first call + * 'hasOnlyRenameableMatchExpressionChildren()' and then call this function if it returns true, + * passing through the resulting 'renameables'. * - * TODO SERVER-74298 The return value might be necessary any more after the ticket is done. + * Note: To enforce the above precondition, the caller should pass in the output of the call to + * hasOnlyRenameableMatchExpressionChildren() as the 'renameables' argument. To avoid passing empty + * vector for 'renameables' like applyRenamesToExpression(expr, {}, {}), the parameter is defined as + * a pointer. + */ +void applyRenamesToExpression(const StringMap<std::string>& renames, + const Renameables* renameables); + +/** + * Copies the 'expr' and applies the renames specified in 'renames' to the copy of 'expr' and + * returns the renamed copy of 'expr' if renaming is successful. Otherwise, returns nullptr. + * 'renames' maps from path names in 'expr' to the new values of those paths. For example, suppose + * the original match expression is {old: {$gt: 3}} and 'renames' contains the mapping "old" => + * "new". The returned expression will be {new: {$gt: 3}}. */ -bool applyRenamesToExpression(MatchExpression* expr, const StringMap<std::string>& renames); +std::unique_ptr<MatchExpression> copyExpressionAndApplyRenames( + const MatchExpression* expr, const StringMap<std::string>& renames); /** * Split a MatchExpression into two parts: diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp index f2032fcdfd8..eabded3763b 100644 --- a/src/mongo/db/matcher/expression_algo_test.cpp +++ b/src/mongo/db/matcher/expression_algo_test.cpp @@ -826,8 +826,8 @@ TEST(IsIndependent, AndIsIndependentOnlyIfChildrenAre) { ASSERT_OK(status.getStatus()); unique_ptr<MatchExpression> expr = std::move(status.getValue()); - ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"b"})); - ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"})); + ASSERT_FALSE(expression::isIndependentOfConst(*expr.get(), {"b"})); + ASSERT_TRUE(expression::isIndependentOfConst(*expr.get(), {"c"})); } TEST(IsIndependent, ElemMatchIsIndependent) { @@ -838,9 +838,9 @@ TEST(IsIndependent, ElemMatchIsIndependent) { ASSERT_OK(status.getStatus()); unique_ptr<MatchExpression> expr = std::move(status.getValue()); - ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"x"})); - ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"x.y"})); - ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"y"})); + ASSERT_FALSE(expression::isIndependentOfConst(*expr.get(), {"x"})); + ASSERT_FALSE(expression::isIndependentOfConst(*expr.get(), {"x.y"})); + ASSERT_TRUE(expression::isIndependentOfConst(*expr.get(), {"y"})); } TEST(IsIndependent, NorIsIndependentOnlyIfChildrenAre) { @@ -851,8 +851,8 @@ TEST(IsIndependent, NorIsIndependentOnlyIfChildrenAre) { ASSERT_OK(status.getStatus()); unique_ptr<MatchExpression> expr = std::move(status.getValue()); - ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"b"})); - ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"})); + ASSERT_FALSE(expression::isIndependentOfConst(*expr.get(), {"b"})); + ASSERT_TRUE(expression::isIndependentOfConst(*expr.get(), {"c"})); } TEST(IsIndependent, NotIsIndependentOnlyIfChildrenAre) { @@ -863,8 +863,8 @@ TEST(IsIndependent, NotIsIndependentOnlyIfChildrenAre) { ASSERT_OK(status.getStatus()); unique_ptr<MatchExpression> expr = std::move(status.getValue()); - ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"b"})); - ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a"})); + ASSERT_TRUE(expression::isIndependentOfConst(*expr.get(), {"b"})); + ASSERT_FALSE(expression::isIndependentOfConst(*expr.get(), {"a"})); } TEST(IsIndependent, OrIsIndependentOnlyIfChildrenAre) { @@ -875,8 +875,8 @@ TEST(IsIndependent, OrIsIndependentOnlyIfChildrenAre) { ASSERT_OK(status.getStatus()); unique_ptr<MatchExpression> expr = std::move(status.getValue()); - ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a"})); - ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"})); + ASSERT_FALSE(expression::isIndependentOfConst(*expr.get(), {"a"})); + ASSERT_TRUE(expression::isIndependentOfConst(*expr.get(), {"c"})); } TEST(IsIndependent, AndWithDottedFieldPathsIsNotIndependent) { @@ -887,8 +887,8 @@ TEST(IsIndependent, AndWithDottedFieldPathsIsNotIndependent) { ASSERT_OK(status.getStatus()); unique_ptr<MatchExpression> expr = std::move(status.getValue()); - ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.b.c"})); - ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.b"})); + ASSERT_FALSE(expression::isIndependentOfConst(*expr.get(), {"a.b.c"})); + ASSERT_FALSE(expression::isIndependentOfConst(*expr.get(), {"a.b"})); } TEST(IsIndependent, BallIsIndependentOfBalloon) { @@ -899,9 +899,9 @@ TEST(IsIndependent, BallIsIndependentOfBalloon) { ASSERT_OK(status.getStatus()); unique_ptr<MatchExpression> expr = std::move(status.getValue()); - ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"a.balloon"})); - ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"a.b"})); - ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.ball.c"})); + ASSERT_TRUE(expression::isIndependentOfConst(*expr.get(), {"a.balloon"})); + ASSERT_TRUE(expression::isIndependentOfConst(*expr.get(), {"a.b"})); + ASSERT_FALSE(expression::isIndependentOfConst(*expr.get(), {"a.ball.c"})); } // This is a descriptive test to ensure that until renames are implemented for these expressions, @@ -921,8 +921,8 @@ TEST(IsIndependent, NonRenameableExpressionIsNotIndependent) { auto matchExpression = std::move(swMatchExpression.getValue()); // Both of these should be true once renames are implemented. - ASSERT_FALSE(expression::isIndependentOf(*matchExpression.get(), {"c"})); - ASSERT_FALSE(expression::isOnlyDependentOn(*matchExpression.get(), {"a", "b"})); + ASSERT_FALSE(expression::isIndependentOfConst(*matchExpression.get(), {"c"})); + ASSERT_FALSE(expression::isOnlyDependentOnConst(*matchExpression.get(), {"a", "b"})); } } @@ -932,7 +932,7 @@ TEST(IsIndependent, EmptyDependencySetsPassIsOnlyDependentOn) { auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx)); ASSERT_OK(swMatchExpression.getStatus()); auto matchExpression = std::move(swMatchExpression.getValue()); - ASSERT_TRUE(expression::isOnlyDependentOn(*matchExpression.get(), {})); + ASSERT_TRUE(expression::isOnlyDependentOnConst(*matchExpression.get(), {})); } TEST(ContainsOverlappingPaths, Basics) { @@ -1830,7 +1830,10 @@ TEST(SplitMatchExpression, ShouldSplitOutAndRenameJsonSchemaPatternByIsOnlyDepen ASSERT_TRUE(splitOutExpr.get()); // 'splitOutExpr' must be same as the expression after renaming 'a' to 'meta'. - expression::applyRenamesToExpression(originalExprCopy.get(), {{"a", "meta"}}); + expression::Renameables renameables; + ASSERT_TRUE( + expression::isOnlyDependentOn(*originalExprCopy, {"a"}, {{"a", "meta"}}, renameables)); + expression::applyRenamesToExpression({{"a", "meta"}}, &renameables); ASSERT_BSONOBJ_EQ(splitOutExpr->serialize(), originalExprCopy->serialize()); ASSERT_FALSE(residualExpr.get()); @@ -1918,9 +1921,10 @@ TEST(ApplyRenamesToExpression, ShouldApplyBasicRenamesForAMatchWithExpr) { ASSERT_OK(matcher.getStatus()); StringMap<std::string> renames{{"a", "d"}, {"c", "e"}, {"x", "y"}}; - expression::applyRenamesToExpression(matcher.getValue().get(), renames); + auto renamedExpr = expression::copyExpressionAndApplyRenames(matcher.getValue().get(), renames); + ASSERT_TRUE(renamedExpr); - ASSERT_BSONOBJ_EQ(matcher.getValue()->serialize(), fromjson("{$expr: {$eq: ['$d.b', '$e']}}")); + ASSERT_BSONOBJ_EQ(renamedExpr->serialize(), fromjson("{$expr: {$eq: ['$d.b', '$e']}}")); } TEST(ApplyRenamesToExpression, ShouldApplyDottedRenamesForAMatchWithExpr) { @@ -1930,9 +1934,10 @@ TEST(ApplyRenamesToExpression, ShouldApplyDottedRenamesForAMatchWithExpr) { ASSERT_OK(matcher.getStatus()); StringMap<std::string> renames{{"a.b.c", "x"}, {"d.e", "y"}}; - expression::applyRenamesToExpression(matcher.getValue().get(), renames); + auto renamedExpr = expression::copyExpressionAndApplyRenames(matcher.getValue().get(), renames); + ASSERT_TRUE(renamedExpr); - ASSERT_BSONOBJ_EQ(matcher.getValue()->serialize(), fromjson("{$expr: {$lt: ['$x', '$y.f']}}")); + ASSERT_BSONOBJ_EQ(renamedExpr->serialize(), fromjson("{$expr: {$lt: ['$x', '$y.f']}}")); } TEST(ApplyRenamesToExpression, ShouldApplyDottedRenamesForAMatchWithNestedExpr) { @@ -1943,10 +1948,11 @@ TEST(ApplyRenamesToExpression, ShouldApplyDottedRenamesForAMatchWithNestedExpr) ASSERT_OK(matcher.getStatus()); StringMap<std::string> renames{{"a", "x.y"}, {"d.e", "y"}, {"c", "q.r"}}; - expression::applyRenamesToExpression(matcher.getValue().get(), renames); + auto renamedExpr = expression::copyExpressionAndApplyRenames(matcher.getValue().get(), renames); + ASSERT_TRUE(renamedExpr); ASSERT_BSONOBJ_EQ( - matcher.getValue()->serialize(), + renamedExpr->serialize(), fromjson( "{$and: [{$expr: {$eq: ['$x.y.b.c', '$q.r']}}, {$expr: {$lt: ['$y.f', '$x.y']}}]}")); } @@ -1958,10 +1964,11 @@ TEST(ApplyRenamesToExpression, ShouldNotApplyRenamesForAMatchWithExprWithNoField ASSERT_OK(matcher.getStatus()); StringMap<std::string> renames{{"a", "x.y"}, {"d.e", "y"}, {"c", "q.r"}}; - expression::applyRenamesToExpression(matcher.getValue().get(), renames); + auto renamedExpr = expression::copyExpressionAndApplyRenames(matcher.getValue().get(), renames); + ASSERT_TRUE(renamedExpr); ASSERT_BSONOBJ_EQ( - matcher.getValue()->serialize(), + renamedExpr->serialize(), fromjson("{$expr: {$concat: [{$const: 'a'}, {$const: 'b'}, {$const: 'c'}]}}")); } diff --git a/src/mongo/db/matcher/expression_expr.h b/src/mongo/db/matcher/expression_expr.h index 0f21242b316..41285aaf072 100644 --- a/src/mongo/db/matcher/expression_expr.h +++ b/src/mongo/db/matcher/expression_expr.h @@ -38,6 +38,7 @@ #include "mongo/db/pipeline/expression.h" #include "mongo/db/pipeline/expression_context.h" #include "mongo/db/pipeline/expression_walker.h" +#include "mongo/db/query/expression_walker.h" #include "mongo/util/assert_util.h" namespace mongo { @@ -136,6 +137,18 @@ public: expression_walker::walk<Expression>(_expression.get(), &substituteWalker); } + bool hasRenameablePath(const StringMap<std::string>& renameList) const { + bool hasRenameablePath = false; + FieldPathVisitor visitor([&](const ExpressionFieldPath* expr) { + hasRenameablePath = + hasRenameablePath || expr->isRenameableByAnyPrefixNameIn(renameList); + }); + stage_builder::ExpressionWalker walker( + &visitor, nullptr /*inVisitor*/, nullptr /*postVisitor*/); + expression_walker::walk(_expression.get(), &walker); + return hasRenameablePath; + } + private: ExpressionOptimizerFunc getOptimizer() const final; diff --git a/src/mongo/db/matcher/expression_path.h b/src/mongo/db/matcher/expression_path.h index 7bed80b751a..291cc750c99 100644 --- a/src/mongo/db/matcher/expression_path.h +++ b/src/mongo/db/matcher/expression_path.h @@ -105,20 +105,52 @@ public: * element) and the path components that should replace the renamed prefix (as the second * element). * - * Returns whether there is any attempted but failed to rename. This case can happen when any - * renamed path component is part of sub-fields. For example, expr = {x: {$eq: {y: 3}}} and - * renames = {{"x.y", "a.b"}}. We should be able to rename 'x' and 'y' to 'a' and 'b' - * respectively but due to the current limitation of the algorithm, we cannot rename such match - * expressions. + * Returns whether renames will always succeed if any rename is applicable. See + * wouldRenameSucceed() for more details. * * TODO SERVER-74298 As soon as we implement SERVER-74298, the return value might not be * necessary any more. */ - bool applyRename(const StringMap<std::string>& renameList) { - if (!_elementPath) { + [[nodiscard]] bool applyRename(const StringMap<std::string>& renameList) { + if (!_elementPath || renameList.size() == 0) { + return true; + } + + if (auto&& [isRenamable, optRewrittenPath] = wouldRenameSucceed(renameList); !isRenamable) { return false; + } else if (optRewrittenPath) { + setPath(*optRewrittenPath); } + return true; + } + + /** + * Returns a pair of bool and boost::optional<StringData>. + * + * - The bool indicates whether renames will always succeed if any rename is applicable. No + * applicable renames is considered as a successful rename and returns true with the second + * element of the pair is boost::none. This function can return false when a renamed path + * component descends into an $elemMatch or an object literal. For examples, + * + * expr = {x: {$eq: {y: 3}}} and renames = {{"x.y", "a.b"}}. We should be able to rename 'x' + * and 'y' to 'a' and 'b' respectively but due to the current limitation of the algorithm, we + * cannot rename such match expressions. + * + * Another similar example is expr = {x: {$elemMatch: {$eq: {y: 3}}}} and renames = {{"x.y", + * "a.b"}}. + + * - The boost::optional<StringData> is the rewritten path iff one rename is applicable. The + * rewritten path is the path after applying the only applicable rename in 'renameList'. If no + * rename is applicable, the rewritten path is boost::none. + * + * TODO SERVER-74298 As soon as we implement SERVER-74298, this separate function may not be + * necessary any more and can be combined into applyRenames(). + */ + std::pair<bool, boost::optional<std::string>> wouldRenameSucceed( + const StringMap<std::string>& renameList) const { + invariant(_elementPath); + size_t renamesFound = 0u; std::string rewrittenPath; for (const auto& rename : renameList) { @@ -141,9 +173,14 @@ public: ++renamesFound; } else if (pathFieldRef.isPrefixOf(prefixToRename)) { - // TODO SERVER-74298 Implement renaming by each path component instead of - // incrementing 'attemptedButFailedRenames'. - return true; + // TODO SERVER-74298 Implement renaming by each path component instead of returning + // the pair of 'false' and boost::none. We can traverse subexpressions with the + // remaining path suffix of 'prefixToRename' to see if we can rename each path + // component. Any subexpression would succeed with 'rewrittenPath' then this path + // component can be renamed. For example, assuming that 'pathFieldRef' == "a.b" and + // 'prefixToRename' == "a.b.c", we can recurse down to the subexpression with path + // "c" to see if we can rename it. If we can, we can rename this path too. + return {false, boost::none}; } } @@ -152,10 +189,10 @@ public: if (renamesFound == 1u) { // There is an applicable rename. Modify the path of this expression to use the new // name. - setPath(rewrittenPath); + return {true, rewrittenPath}; } - return false; + return {true, boost::none}; } void serialize(BSONObjBuilder* out, SerializationOptions opts) const override { diff --git a/src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp b/src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp index 1f13e8ed87f..877d3f18612 100644 --- a/src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp +++ b/src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp @@ -57,7 +57,7 @@ std::unique_ptr<PathMatchExpression> cloneWithSubstitution( const PathMatchExpression* predicate, const StringMap<std::string>& renameList) { auto clonedPred = std::unique_ptr<PathMatchExpression>( static_cast<PathMatchExpression*>(predicate->clone().release())); - clonedPred->applyRename(renameList); + tassert(7585302, "Failed to rename", clonedPred->applyRename(renameList)); return clonedPred; } boost::intrusive_ptr<ExpressionFieldPath> cloneWithSubstitution( diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp index 932c8404210..83fe6400e20 100644 --- a/src/mongo/db/pipeline/expression.cpp +++ b/src/mongo/db/pipeline/expression.cpp @@ -2649,6 +2649,21 @@ std::unique_ptr<Expression> ExpressionFieldPath::copyWithSubstitution( return nullptr; } +bool ExpressionFieldPath::isRenameableByAnyPrefixNameIn( + const StringMap<std::string>& renameList) const { + if (_variable != Variables::kRootId || _fieldPath.getPathLength() == 1) { + return false; + } + + FieldRef path(getFieldPathWithoutCurrentPrefix().fullPath()); + for (const auto& rename : renameList) { + if (FieldRef oldName(rename.first); oldName.isPrefixOfOrEqualTo(path)) { + return true; + } + } + return false; +} + monotonic::State ExpressionFieldPath::getMonotonicState(const FieldPath& sortedFieldPath) const { return getFieldPathWithoutCurrentPrefix() == sortedFieldPath ? monotonic::State::Increasing : monotonic::State::NonMonotonic; diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h index 297592a2cf7..75c4ecca30c 100644 --- a/src/mongo/db/pipeline/expression.h +++ b/src/mongo/db/pipeline/expression.h @@ -2030,6 +2030,12 @@ public: std::unique_ptr<Expression> copyWithSubstitution( const StringMap<std::string>& renameList) const; + /** + * Checks if any key of 'renameList' map is a prefix of this ExpressionFieldPath's path. It + * would mean that this ExpressionFieldPath is renameable by 'renameList' if so. + */ + bool isRenameableByAnyPrefixNameIn(const StringMap<std::string>& renameList) const; + void acceptVisitor(ExpressionMutableVisitor* visitor) final { return visitor->visit(this); } @@ -4288,6 +4294,30 @@ struct SubstituteFieldPathWalker { }; /** + * This visitor is used to visit only ExpressionFieldPath nodes in an expression tree and call 'fn' + * on them. + * + * Usage example: + * bool isFoo = false; + * FieldPathVisitor visitor([&](const ExpressionFieldPath* expr) { + * isFoo = isFoo || expr->isFoo(); + * }); + */ +template <typename F> +struct FieldPathVisitor : public SelectiveConstExpressionVisitorBase { + // To avoid overloaded-virtual warnings. + using SelectiveConstExpressionVisitorBase::visit; + + explicit FieldPathVisitor(const F& fn) : _fn(fn) {} + + void visit(const ExpressionFieldPath* expr) final { + _fn(expr); + } + + F _fn; +}; + +/** * $dateTrunc expression that maps a date to a lower bound of a bin of a certain size that the date * belongs to. It uses 2000-01-01T00:00:00.000 as a reference point. */ diff --git a/src/mongo/db/query/expression_walker.h b/src/mongo/db/query/expression_walker.h index 55b4ddf4054..67369f7d290 100644 --- a/src/mongo/db/query/expression_walker.h +++ b/src/mongo/db/query/expression_walker.h @@ -27,7 +27,7 @@ * it in the license file. */ -#include "mongo/platform/basic.h" +#pragma once #include "mongo/db/pipeline/expression_visitor.h" diff --git a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp index ac49912d3dc..03c3a9e6d73 100644 --- a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp +++ b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp @@ -480,11 +480,11 @@ bool doesBucketsIndexIncludeMeasurement(OperationContext* opCtx, statusWithFilter.isOK()); auto filter = std::move(statusWithFilter.getValue()); - if (!expression::isOnlyDependentOn(*filter, - {std::string{timeseries::kBucketMetaFieldName}, - controlMinTimeField, - controlMaxTimeField, - idField})) { + if (!expression::isOnlyDependentOnConst(*filter, + {std::string{timeseries::kBucketMetaFieldName}, + controlMinTimeField, + controlMaxTimeField, + idField})) { // Partial filter expression depends on a non-time, non-metadata field. return true; } |