/** * Copyright (C) 2018-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #pragma once #include #include "mongo/base/status.h" #include "mongo/db/query/canonical_query.h" #include "mongo/db/query/index_entry.h" #include "mongo/db/query/index_tag.h" #include "mongo/db/query/plan_enumerator_explain_info.h" #include "mongo/db/query/query_knobs_gen.h" #include "mongo/stdx/unordered_map.h" namespace mongo { struct PlanEnumeratorParams { PlanEnumeratorParams() : maxSolutionsPerOr(internalQueryEnumerationMaxOrSolutions.load()), maxIntersectPerAnd(internalQueryEnumerationMaxIntersectPerAnd.load()), disableOrPushdown(disableMatchExpressionOptimization.shouldFail()) {} // Do we provide solutions that use more indices than the minimum required to provide // an indexed solution? bool intersect = false; // Do we enumerate children of an $or in a special order to prioritize solutions which have the // same assignment on each branch? bool enumerateOrChildrenLockstep = false; // Not owned here. MatchExpression* root; // Not owned here. const std::vector* indices; // How many plans are we willing to ouput from an OR? We currently consider // all possibly OR plans, which means the product of the number of possibilities // for each clause of the OR. This could grow disastrously large. size_t maxSolutionsPerOr; // How many intersect plans are we willing to output from an AND? Given that we pursue an // all-pairs approach, we could wind up creating a lot of enumeration possibilities for // certain inputs. size_t maxIntersectPerAnd; // Whether to disable OR-pushdown optimization. OR-pushdown assumes that the expression has been // simplified: for example, that single-child $or nodes are unwrapped. To avoid this, when // the 'disableMatchExpressionOptimization' failpoint is set, we also disable OR-pushdown. bool disableOrPushdown; }; /** * Provides elements from the power set of possible indices to use. Uses the available * predicate information to make better decisions about what indices are best. */ class PlanEnumerator { PlanEnumerator(const PlanEnumerator&) = delete; PlanEnumerator& operator=(const PlanEnumerator&) = delete; public: /** * Constructs an enumerator for the query specified in 'root' which is tagged with * RelevantTag(s). The index patterns mentioned in the tags are described by 'indices'. * * Does not take ownership of any arguments. They must outlive any calls to getNext(...). */ PlanEnumerator(const PlanEnumeratorParams& params); ~PlanEnumerator(); /** * Returns OK and performs a sanity check on the input parameters and prepares the * internal state so that getNext() can be called. Returns an error status with a * description if the sanity check failed. */ Status init(); /** * Outputs a possible plan. Leaves in the plan are tagged with an index to use. Returns a * MatchExpression representing a point in the query tree (which can be used to build a * QuerySolutionNode tree) or nullptr if no more plans will be outputted. While owned by the * caller, the MatchExpression returned points into data that is owned elsewhere. * * Nodes in 'tree' are tagged with indices that should be used to answer the tagged nodes. Only * nodes that have a field name (getCategory() != kLogical) will be tagged. * * The output tree is a clone identical to that used to initialize the enumerator, with tags * added in order to indicate index usage. */ std::unique_ptr getNext(); PlanEnumeratorExplainInfo _explainInfo; private: // // Memoization strategy // // Everything is really a size_t but it's far more readable to impose a type via typedef. // An ID we use to index into _memo. An entry in _memo is a NodeAssignment. typedef size_t MemoID; // An index in _indices. typedef size_t IndexID; // The position of a field in a possibly compound index. typedef size_t IndexPosition; /** * Represents the route that an outside predicate has taken during the PlanEnumerator's * recursive descent of the match expression tree. */ struct OutsidePredRoute { /** * Whether or not the route has traversed through an $elemMatch object node. This is needed * because it is not correct to push down a predicate through an $elemMatch object. */ bool traversedThroughElemMatchObj = false; /** * The route of the outside predicate. This starts at the indexed OR sibling of the * predicate. Each value in 'route' is the index of a child in an indexed OR. * * For example, if the MatchExpression tree is: * AND * / \ * {a: 5} OR * / \ * AND {e: 9} * / \ * {b: 6} OR * / \ * {c: 7} {d: 8} * * and the predicate is {a: 5}, then the route will be {0, 1} when the recursive descent * reaches {d: 8}. */ std::deque route; }; struct PrepMemoContext { PrepMemoContext() : elemMatchExpr(nullptr) {} MatchExpression* elemMatchExpr; // Maps from indexable predicates that can be pushed into the current node to the route // through ORs that they have taken to get to this node. stdx::unordered_map outsidePreds; }; /** * Traverses the match expression and generates the memo structure from it. * Returns true if the provided node uses an index, false otherwise. */ bool prepMemo(MatchExpression* node, PrepMemoContext context); /** * Traverses the memo structure and annotates the tree with IndexTags for the chosen * indices. */ void tagMemo(MemoID id); /** * Move to the next enumeration state. Each assignment stores its own enumeration state. * See the various ____Assignment classes below for details on enumeration state. * * Returns true if the memo subtree with root 'node' has no further enumeration states. In * this case, that subtree restarts its enumeration at the beginning state. This implies * that the parent of node should move to the next state. If 'node' is the root of the * tree, we are done with enumeration. * * The return of this function can be thought of like a 'carry' in addition. * * Returns false if the memo subtree has moved to the next state. */ bool nextMemo(MemoID id); /** * A short word on the memo structure. * * The PlanEnumerator is interested in matching predicates and indices. Predicates * are leaf nodes in the parse tree. {x:5}, {x: {$geoWithin:...}} are both predicates. * * Since the output of an OR is the union of its results, each of its children must be * indexed for the entire OR to be indexed. If each subtree of an OR is indexable, the * OR is as well. * * For an AND to be indexed, only one of its children must be indexed. AND is an * intersection of its children, so each of its children describes a superset of the * produced results. * * Leaf predicates are also given AND assignments, since they may index outside predicates that * have been pushed down through OR nodes. */ struct OrAssignment { OrAssignment() : counter(0) {} // Each child of an OR must be indexed for the OR to be indexed. When an OR moves to a // subsequent state it just asks all its children to move their states forward. // Must use all of subnodes. std::vector subnodes; // The number of OR states that we've enumerated so far. size_t counter; }; struct LockstepOrAssignment { struct PreferFirstSubNode { MemoID memoId; size_t iterationCount; boost::optional maxIterCount; }; std::vector subnodes; bool exhaustedLockstepIteration = false; size_t totalEnumerated = 0; /** * Returns true if 'totalEnumerated' matches the total number of expected plans for this * assignment. */ bool shouldResetBeforeProceeding(size_t totalEnumerated, size_t orLimit) const; /** * Returns true if each sub node is at the same iterationCount. */ bool allIdentical() const; }; // This is used by AndAssignment and is not an actual assignment. struct OneIndexAssignment { // 'preds[i]' is uses index 'index' at position 'positions[i]' std::vector preds; std::vector positions; IndexID index; // True if the bounds on 'index' for the leaf expressions in 'preds' can be intersected // and/or compounded, and false otherwise. If 'canCombineBounds' is set to false and // multiple predicates are assigned to the same position of a multikey index, then the // access planner should generate a self-intersection plan. bool canCombineBounds = true; // The expressions that should receive an OrPushdownTag when this assignment is made. std::vector> orPushdowns; }; struct AndEnumerableState { std::vector assignments; std::vector subnodesToIndex; }; struct AndAssignment { AndAssignment() : counter(0) {} std::vector choices; // We're on the counter-th member of state. size_t counter; }; struct ArrayAssignment { ArrayAssignment() : counter(0) {} std::vector subnodes; size_t counter; }; /** * Associates indices with predicates. */ struct NodeAssignment { std::unique_ptr orAssignment; std::unique_ptr lockstepOrAssignment; std::unique_ptr andAssignment; std::unique_ptr arrayAssignment; std::string toString() const; }; /** * Allocates a NodeAssignment and associates it with the provided 'expr'. * * The unique MemoID of the new assignment is outputted in '*id'. * The out parameter '*slot' points to the newly allocated NodeAssignment. */ void allocateAssignment(MatchExpression* expr, NodeAssignment** slot, MemoID* id); /** * Predicates inside $elemMatch's that are semantically "$and of $and" * predicates are not rewritten to the top-level during normalization. * However, we would like to make predicates inside $elemMatch available * for combining index bounds with the top-level $and predicates. * * This function deeply traverses $and and $elemMatch expressions of * the tree rooted at 'node', adding all preds that can use an index * to the output vector 'indexOut'. At the same time, $elemMatch * context information is stashed in the tags so that we don't lose * information due to flattening. * * Does not take ownership of arguments. * * Returns false if the AND cannot be indexed. Otherwise returns true. */ void getIndexedPreds(MatchExpression* node, PrepMemoContext context, std::vector* indexOut); /** * Recursively traverse 'node', with OR nodes as the base case. The OR nodes are not * explored--instead we call prepMemo() on the OR subnode, and add its assignment to the output. * Subnodes are "mandatory" if they *must* use an index (TEXT and GEO). * Returns a boolean indicating whether all mandatory subnodes can be indexed and returns false * to stop enumerating alternatives for an indexed OR. */ bool prepSubNodes(MatchExpression* node, PrepMemoContext context, std::vector* subnodesOut, std::vector* mandatorySubnodes); /** * Finds a set of predicates that can be safely compounded with the set * of predicates in 'assigned', under the assumption that we are assigning * predicates to a compound, multikey index. * * The list of candidate predicates that we could compound is passed * in 'couldCompound'. A subset of these predicates that is safe to * combine by compounding is returned in the out-parameter 'out'. * * Does not take ownership of its arguments. * * The rules for when to compound for multikey indices are reasonably * complex, and are dependent on the structure of $elemMatch's used * in the query. Ignoring $elemMatch for the time being, the rule is this: * * "Any set of predicates for which no two predicates share a path * prefix can be compounded." * * Suppose we have predicates over paths 'a.b' and 'a.c'. These cannot * be compounded because they share the prefix 'a'. Similarly, the bounds * for 'a' and 'a.b' cannot be compounded (in the case of multikey index * {a: 1, 'a.b': 1}). You *can* compound predicates over the paths 'a.b.c', * 'd', and 'e.b.c', because there is no shared prefix. * * The rules are different in the presence of $elemMatch. For $elemMatch * {a: {$elemMatch: {, ..., }}}, we are allowed to compound * bounds for pred1 through predN, even though these predicates share the * path prefix 'a'. However, we still cannot compound in the case of * {a: {$elemMatch: {'b.c': {$gt: 1}, 'b.d': 5}}} because 'b.c' and 'b.d' * share a prefix. In other words, what matters inside an $elemMatch is not * the absolute prefix, but rather the "relative prefix" after the shared * $elemMatch part of the path. * * A few more examples: * 1) {'a.b': {$elemMatch: {c: {$gt: 1}, d: 5}}}. In this case, we can * compound, because the $elemMatch is applied to the shared part of * the path 'a.b'. * * 2) {'a.b': 1, a: {$elemMatch: {b: {$gt: 0}}}}. We cannot combine the * bounds here because the prefix 'a' is shared by two predicates which * are not joined together by an $elemMatch. * * NOTE: * Usually 'assigned' has just one predicate. However, in order to support * mandatory predicate assignment (TEXT and GEO_NEAR), we allow multiple * already-assigned predicates to be passed. If a mandatory predicate is over * a trailing field in a multikey compound index, then we assign both a predicate * over the leading field as well as the mandatory predicate prior to calling * this function. * * Ex: * Say we have index {a: 1, b: 1, c: "2dsphere", d: 1} as well as a $near * predicate and a $within predicate over "c". The $near predicate is mandatory * and must be assigned. The $within predicate is not mandatory. Furthermore, * it cannot be assigned in addition to the $near predicate because the index * is multikey. * * In this case the enumerator must assign the $near predicate, and pass it in * in 'assigned'. Otherwise it would be possible to assign the $within predicate, * and then not assign the $near because the $within is already assigned (and * has the same path). */ void getMultikeyCompoundablePreds(const std::vector& assigned, const std::vector& couldCompound, std::vector* out); /** * Assigns predicates from 'couldAssign' to 'indexAssignment' that can safely be assigned * according to the intersecting and compounding rules for multikey indexes. The rules can * loosely be stated as follows: * * - It is always safe to assign a predicate on path Y to the index when no prefix of the path * Y causes the index to be multikey. * * - For any non-$elemMatch predicate on path X already assigned to the index, it isn't safe * to assign a predicate on path Y (possibly equal to X) to the index when a shared prefix * of the paths X and Y causes the index to be multikey. * * - For any $elemMatch predicate on path X already assigned to the index, it isn't safe to * assign a predicate on path Y (possibly equal to X) to the index when * (a) a shared prefix of the paths X and Y causes the index to be multikey and the * predicates aren't joined by the same $elemMatch context, or * (b) a shared prefix of the paths X and Y inside the innermost $elemMatch causes the * index to be multikey. * * If a predicate in 'couldAssign' is also in 'outsidePreds', then it is assumed to be an * outside predicate that was pushed down through an OR. Instead of adding it to * indexAssignment->preds, we add it to indexAssignment->orPushdowns. We create its * OrPushdownTag using the route specified in 'outsidePreds'. * * This function should only be called if the index has path-level multikey information. * Otherwise, getMultikeyCompoundablePreds() and compound() should be used instead. */ void assignMultikeySafePredicates( const std::vector& couldAssign, const stdx::unordered_map& outsidePreds, OneIndexAssignment* indexAssignment); /** * 'andAssignment' contains assignments that we've already committed to outputting, * including both single index assignments and ixisect assignments. * * 'ixisectAssigned' is a set of predicates that we are about to add to 'andAssignment' * as an index intersection assignment. * * Returns true if an single index assignment which is already in 'andAssignment' * contains a superset of the predicates in 'ixisectAssigned'. This means that we * can assign the same preds to a compound index rather than using index intersection. * * Ex. * Suppose we have indices {a: 1}, {b: 1}, and {a: 1, b: 1} with query * {a: 2, b: 2}. When we try to intersect {a: 1} and {b: 1} the predicates * a==2 and b==2 will get assigned to respective indices. But then we will * call this function with ixisectAssigned equal to the set {'a==2', 'b==2'}, * and notice that we have already assigned this same set of predicates to * the single index {a: 1, b: 1} via compounding. */ bool alreadyCompounded(const std::set& ixisectAssigned, const AndAssignment* andAssignment); struct CmpByIndexID { bool operator()(IndexID a, IndexID b) const { return a < b; } }; /** * Maps from index id to the list of predicates assigned to that index. */ typedef std::map, CmpByIndexID> IndexToPredMap; /** * Generate index intersection assignments given the predicate/index structure in idxToFirst * and idxToNotFirst (and the sub-trees in 'subnodes'). Outputs the assignments in * 'andAssignment'. */ void enumerateAndIntersect(const IndexToPredMap& idxToFirst, const IndexToPredMap& idxToNotFirst, const std::vector& subnodes, AndAssignment* andAssignment); /** * Generate one-index-at-once assignments given the predicate/index structure in idxToFirst * and idxToNotFirst (and the sub-trees in 'subnodes'). Outputs the assignments into * 'andAssignment'. The predicates in 'outsidePreds' are considered for OrPushdownTags. */ void enumerateOneIndex( IndexToPredMap idxToFirst, IndexToPredMap idxToNotFirst, const std::vector& subnodes, const stdx::unordered_map& outsidePreds, AndAssignment* andAssignment); /** * Generate single-index assignments for queries which contain mandatory * predicates (TEXT and GEO_NEAR, which are required to use a compatible index). * Outputs these assignments into 'andAssignment'. * * Returns true if it generated at least one assignment, and false if no assignment * of 'mandatoryPred' is possible. */ bool enumerateMandatoryIndex(const IndexToPredMap& idxToFirst, const IndexToPredMap& idxToNotFirst, MatchExpression* mandatoryPred, const std::set& mandatoryIndices, AndAssignment* andAssignment); /** * Assigns predicates in 'predsOverLeadingField' and 'idxToNotFirst' to 'indexAssign'. Assumes * that the index is not multikey. Also assumes that that the index is of a type used to answer * "mandatory predicates" such as text or geoNear. */ void assignToNonMultikeyMandatoryIndex( const IndexEntry& index, const std::vector& predsOverLeadingField, const IndexToPredMap& idxToNotFirst, OneIndexAssignment* indexAssign); /** * Try to assign predicates in 'tryCompound' to 'thisIndex' as compound assignments. * Output the assignments in 'assign'. */ void compound(const std::vector& tryCompound, const IndexEntry& thisIndex, OneIndexAssignment* assign); /** * Returns the position that 'predicate' can use in the key pattern for 'indexEntry'. It is * illegal to call this if 'predicate' does not have a RelevantTag, or it cannot use the index. */ size_t getPosition(const IndexEntry& indexEntry, MatchExpression* predicate); /** * Adds 'pred' to 'indexAssignment', using 'position' as its position in the index. If 'pred' is * in 'outsidePreds', then it is assumed to be an outside predicate that was pushed down through * an OR. Instead of adding it to indexAssignment->preds, we add it to * indexAssignment->orPushdowns. We create its OrPushdownTag using the route specified in * 'outsidePreds'. 'pred' must be able to use the index and be multikey-safe to add to * 'indexAssignment'. */ void assignPredicate( const stdx::unordered_map& outsidePreds, MatchExpression* pred, size_t position, OneIndexAssignment* indexAssignment); /** * Sets a flag on all outside pred routes that descend through an $elemMatch object node. */ void markTraversedThroughElemMatchObj(PrepMemoContext* context); /** * Return the memo entry for 'node'. Does some sanity checking to ensure that a memo entry * actually exists. */ MemoID memoIDForNode(MatchExpression* node); /** * Helper for advancing the enumeration for a LockstepOrAssignment node. */ bool _nextMemoForLockstepOrAssignment(LockstepOrAssignment*); std::string dumpMemo(); // Map from expression to its MemoID. stdx::unordered_map _nodeToId; // Map from MemoID to its precomputed solution info. stdx::unordered_map _memo; // If true, there are no further enumeration states, and getNext should return false. // We could be _done immediately after init if we're unable to output an indexed plan. bool _done; // // Data used by all enumeration strategies // // Match expression we're planning for. Not owned by us. MatchExpression* _root; // Indices we're allowed to enumerate with. Not owned here. const std::vector* _indices; // Do we output >1 index per AND (index intersection)? bool _ixisect; // Do we enumerate children of an $or in a special order to prioritize solutions which have the // same assignment on each branch? bool _enumerateOrChildrenLockstep; // How many enumerations are we willing to produce from each OR? size_t _orLimit; // How many things do we want from each AND? size_t _intersectLimit; // Whether we should disable OR-pushdown optimization. const bool _disableOrPushdown; }; } // namespace mongo