/**
* Copyright (C) 2013 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects
* for all of the code used other than as permitted herein. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you do not
* wish to do so, delete this exception statement from your version. If you
* delete this exception statement from all source files in the program,
* then also delete it in the license file.
*/
#pragma once
#include
#include "mongo/base/disallow_copying.h"
#include "mongo/base/status.h"
#include "mongo/db/query/canonical_query.h"
#include "mongo/db/query/index_entry.h"
#include "mongo/db/query/index_tag.h"
#include "mongo/db/query/query_knobs.h"
namespace mongo {
struct PlanEnumeratorParams {
PlanEnumeratorParams() : intersect(false),
maxSolutionsPerOr(internalQueryEnumerationMaxOrSolutions),
maxIntersectPerAnd(internalQueryEnumerationMaxIntersectPerAnd) { }
// Do we provide solutions that use more indices than the minimum required to provide
// an indexed solution?
bool intersect;
// Not owned here.
MatchExpression* root;
// Not owned here.
const vector* indices;
// How many plans are we willing to ouput from an OR? We currently consider
// all possibly OR plans, which means the product of the number of possibilities
// for each clause of the OR. This could grow disastrously large.
size_t maxSolutionsPerOr;
// How many intersect plans are we willing to output from an AND? Given that we pursue an
// all-pairs approach, we could wind up creating a lot of enumeration possibilities for
// certain inputs.
size_t maxIntersectPerAnd;
};
/**
* Provides elements from the power set of possible indices to use. Uses the available
* predicate information to make better decisions about what indices are best.
*/
class PlanEnumerator {
MONGO_DISALLOW_COPYING(PlanEnumerator);
public:
/**
* Constructs an enumerator for the query specified in 'root' which is tagged with
* RelevantTag(s). The index patterns mentioned in the tags are described by 'indices'.
*
* Does not take ownership of any arguments. They must outlive any calls to getNext(...).
*/
PlanEnumerator(const PlanEnumeratorParams& params);
~PlanEnumerator();
/**
* Returns OK and performs a sanity check on the input parameters and prepares the
* internal state so that getNext() can be called. Returns an error status with a
* description if the sanity check failed.
*/
Status init();
/**
* Outputs a possible plan. Leaves in the plan are tagged with an index to use.
* Returns true if a plan was outputted, false if no more plans will be outputted.
*
* 'tree' is set to point to the query tree. A QueryAssignment is built from this tree.
* Caller owns the pointer. Note that 'tree' itself points into data owned by the
* provided CanonicalQuery.
*
* Nodes in 'tree' are tagged with indices that should be used to answer the tagged nodes.
* Only nodes that have a field name (isLogical() == false) will be tagged.
*/
bool getNext(MatchExpression** tree);
private:
//
// Memoization strategy
//
// Everything is really a size_t but it's far more readable to impose a type via typedef.
// An ID we use to index into _memo. An entry in _memo is a NodeAssignment.
typedef size_t MemoID;
// An index in _indices.
typedef size_t IndexID;
// The position of a field in a possibly compound index.
typedef size_t IndexPosition;
struct PrepMemoContext {
PrepMemoContext() : elemMatchExpr(NULL) { }
MatchExpression* elemMatchExpr;
};
/**
* Traverses the match expression and generates the memo structure from it.
* Returns true if the provided node uses an index, false otherwise.
*/
bool prepMemo(MatchExpression* node, PrepMemoContext context);
/**
* Traverses the memo structure and annotates the tree with IndexTags for the chosen
* indices.
*/
void tagMemo(MemoID id);
/**
* Move to the next enumeration state. Each assignment stores its own enumeration state.
* See the various ____Assignment classes below for details on enumeration state.
*
* Returns true if the memo subtree with root 'node' has no further enumeration states. In
* this case, that subtree restarts its enumeration at the beginning state. This implies
* that the parent of node should move to the next state. If 'node' is the root of the
* tree, we are done with enumeration.
*
* The return of this function can be thought of like a 'carry' in addition.
*
* Returns false if the memo subtree has moved to the next state.
*/
bool nextMemo(MemoID id);
/**
* A short word on the memo structure.
*
* The PlanEnumerator is interested in matching predicates and indices. Predicates
* are leaf nodes in the parse tree. {x:5}, {x: {$geoWithin:...}} are both predicates.
*
* When we have simple predicates, like {x:5}, the task is easy: any indices prefixed
* with 'x' can be used to answer the predicate. This is where the PredicateAssignment
* is used.
*
* With logical operators, things are more complicated. Let's start with OR, the simplest.
* Since the output of an OR is the union of its results, each of its children must be
* indexed for the entire OR to be indexed. If each subtree of an OR is indexable, the
* OR is as well.
*
* For an AND to be indexed, only one of its children must be indexed. AND is an
* intersection of its children, so each of its children describes a superset of the
* produced results.
*/
struct PredicateAssignment {
PredicateAssignment() : indexToAssign(0) { }
vector first;
// Not owned here.
MatchExpression* expr;
// Enumeration state. An indexed predicate's possible states are the indices that the
// predicate can directly use (the 'first' indices). As such this value ranges from 0
// to first.size()-1 inclusive.
size_t indexToAssign;
};
struct OrAssignment {
OrAssignment() : counter(0) { }
// Each child of an OR must be indexed for the OR to be indexed. When an OR moves to a
// subsequent state it just asks all its children to move their states forward.
// Must use all of subnodes.
vector subnodes;
// The number of OR states that we've enumerated so far.
size_t counter;
};
// This is used by AndAssignment and is not an actual assignment.
struct OneIndexAssignment {
// 'preds[i]' is uses index 'index' at position 'positions[i]'
vector preds;
vector positions;
IndexID index;
};
struct AndEnumerableState {
vector assignments;
vector subnodesToIndex;
};
struct AndAssignment {
AndAssignment() : counter(0) { }
vector choices;
// We're on the counter-th member of state.
size_t counter;
};
struct ArrayAssignment {
ArrayAssignment() : counter(0) { }
vector subnodes;
size_t counter;
};
/**
* Associates indices with predicates.
*/
struct NodeAssignment {
scoped_ptr pred;
scoped_ptr orAssignment;
scoped_ptr andAssignment;
scoped_ptr arrayAssignment;
string toString() const;
};
/**
* Allocates a NodeAssignment and associates it with the provided 'expr'.
*
* The unique MemoID of the new assignment is outputted in '*id'.
* The out parameter '*slot' points to the newly allocated NodeAssignment.
*/
void allocateAssignment(MatchExpression* expr, NodeAssignment** slot, MemoID* id);
/**
* Predicates inside $elemMatch's that are semantically "$and of $and"
* predicates are not rewritten to the top-level during normalization.
* However, we would like to make predicates inside $elemMatch available
* for combining index bounds with the top-level $and predicates.
*
* This function deeply traverses $and and $elemMatch expressions of
* the tree rooted at 'node', adding all preds that can use an index
* to the output vector 'indexOut'. At the same time, $elemMatch
* context information is stashed in the tags so that we don't lose
* information due to flattening.
*
* Nodes that cannot be deeply traversed are returned via the output
* vectors 'subnodesOut' and 'mandatorySubnodes'. Subnodes are "mandatory"
* if they *must* use an index (TEXT and GEO).
*
* Does not take ownership of arguments.
*
* Returns false if the AND cannot be indexed. Otherwise returns true.
*/
bool partitionPreds(MatchExpression* node,
PrepMemoContext context,
vector* indexOut,
vector* subnodesOut,
vector* mandatorySubnodes);
/**
* Finds a set of predicates that can be safely compounded with the set
* of predicates in 'assigned', under the assumption that we are assigning
* predicates to a compound, multikey index.
*
* The list of candidate predicates that we could compound is passed
* in 'couldCompound'. A subset of these predicates that is safe to
* combine by compounding is returned in the out-parameter 'out'.
*
* Does not take ownership of its arguments.
*
* The rules for when to compound for multikey indices are reasonably
* complex, and are dependent on the structure of $elemMatch's used
* in the query. Ignoring $elemMatch for the time being, the rule is this:
*
* "Any set of predicates for which no two predicates share a path
* prefix can be compounded."
*
* Suppose we have predicates over paths 'a.b' and 'a.c'. These cannot
* be compounded because they share the prefix 'a'. Similarly, the bounds
* for 'a' and 'a.b' cannot be compounded (in the case of multikey index
* {a: 1, 'a.b': 1}). You *can* compound predicates over the paths 'a.b.c',
* 'd', and 'e.b.c', because there is no shared prefix.
*
* The rules are different in the presence of $elemMatch. For $elemMatch
* {a: {$elemMatch: {, ..., }}}, we are allowed to compound
* bounds for pred1 through predN, even though these predicates share the
* path prefix 'a'. However, we still cannot compound in the case of
* {a: {$elemMatch: {'b.c': {$gt: 1}, 'b.d': 5}}} because 'b.c' and 'b.d'
* share a prefix. In other words, what matters inside an $elemMatch is not
* the absolute prefix, but rather the "relative prefix" after the shared
* $elemMatch part of the path.
*
* A few more examples:
* 1) {'a.b': {$elemMatch: {c: {$gt: 1}, d: 5}}}. In this case, we can
* compound, because the $elemMatch is applied to the shared part of
* the path 'a.b'.
*
* 2) {'a.b': 1, a: {$elemMatch: {b: {$gt: 0}}}}. We cannot combine the
* bounds here because the prefix 'a' is shared by two predicates which
* are not joined together by an $elemMatch.
*
* NOTE:
* Usually 'assigned' has just one predicate. However, in order to support
* mandatory predicate assignment (TEXT and GEO_NEAR), we allow multiple
* already-assigned predicates to be passed. If a mandatory predicate is over
* a trailing field in a multikey compound index, then we assign both a predicate
* over the leading field as well as the mandatory predicate prior to calling
* this function.
*
* Ex:
* Say we have index {a: 1, b: 1, c: "2dsphere", d: 1} as well as a $near
* predicate and a $within predicate over "c". The $near predicate is mandatory
* and must be assigned. The $within predicate is not mandatory. Furthermore,
* it cannot be assigned in addition to the $near predicate because the index
* is multikey.
*
* In this case the enumerator must assign the $near predicate, and pass it in
* in 'assigned'. Otherwise it would be possible to assign the $within predicate,
* and then not assign the $near because the $within is already assigned (and
* has the same path).
*/
void getMultikeyCompoundablePreds(const vector& assigned,
const vector& couldCompound,
vector* out);
/**
* 'andAssignment' contains assignments that we've already committed to outputting,
* including both single index assignments and ixisect assignments.
*
* 'ixisectAssigned' is a set of predicates that we are about to add to 'andAssignment'
* as an index intersection assignment.
*
* Returns true if an single index assignment which is already in 'andAssignment'
* contains a superset of the predicates in 'ixisectAssigned'. This means that we
* can assign the same preds to a compound index rather than using index intersection.
*
* Ex.
* Suppose we have indices {a: 1}, {b: 1}, and {a: 1, b: 1} with query
* {a: 2, b: 2}. When we try to intersect {a: 1} and {b: 1} the predicates
* a==2 and b==2 will get assigned to respective indices. But then we will
* call this function with ixisectAssigned equal to the set {'a==2', 'b==2'},
* and notice that we have already assigned this same set of predicates to
* the single index {a: 1, b: 1} via compounding.
*/
bool alreadyCompounded(const set& ixisectAssigned,
const AndAssignment* andAssignment);
/**
* Output index intersection assignments inside of an AND node.
*/
typedef unordered_map > IndexToPredMap;
/**
* Generate index intersection assignments given the predicate/index structure in idxToFirst
* and idxToNotFirst (and the sub-trees in 'subnodes'). Outputs the assignments in
* 'andAssignment'.
*/
void enumerateAndIntersect(const IndexToPredMap& idxToFirst,
const IndexToPredMap& idxToNotFirst,
const vector& subnodes,
AndAssignment* andAssignment);
/**
* Generate one-index-at-once assignments given the predicate/index structure in idxToFirst
* and idxToNotFirst (and the sub-trees in 'subnodes'). Outputs the assignments into
* 'andAssignment'.
*/
void enumerateOneIndex(const IndexToPredMap& idxToFirst,
const IndexToPredMap& idxToNotFirst,
const vector& subnodes,
AndAssignment* andAssignment);
/**
* Generate single-index assignments for queries which contain mandatory
* predicates (TEXT and GEO_NEAR, which are required to use a compatible index).
* Outputs these assignments into 'andAssignment'.
*/
void enumerateMandatoryIndex(const IndexToPredMap& idxToFirst,
const IndexToPredMap& idxToNotFirst,
MatchExpression* mandatoryPred,
const set& mandatoryIndices,
AndAssignment* andAssignment);
/**
* Try to assign predicates in 'tryCompound' to 'thisIndex' as compound assignments.
* Output the assignments in 'assign'.
*/
void compound(const vector& tryCompound,
const IndexEntry& thisIndex,
OneIndexAssignment* assign);
/**
* Return the memo entry for 'node'. Does some sanity checking to ensure that a memo entry
* actually exists.
*/
MemoID memoIDForNode(MatchExpression* node);
std::string dumpMemo();
// Map from expression to its MemoID.
unordered_map _nodeToId;
// Map from MemoID to its precomputed solution info.
unordered_map _memo;
// If true, there are no further enumeration states, and getNext should return false.
// We could be _done immediately after init if we're unable to output an indexed plan.
bool _done;
//
// Data used by all enumeration strategies
//
// Match expression we're planning for. Not owned by us.
MatchExpression* _root;
// Indices we're allowed to enumerate with. Not owned here.
const vector* _indices;
// Do we output >1 index per AND (index intersection)?
bool _ixisect;
// How many enumerations are we willing to produce from each OR?
size_t _orLimit;
// How many things do we want from each AND?
size_t _intersectLimit;
};
} // namespace mongo