/**
* Copyright (C) 2013 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/db/query/canonical_query.h"
#include "mongo/db/query/index_bounds_builder.h"
#include "mongo/db/query/query_planner_params.h"
#include "mongo/db/query/query_solution.h"
namespace mongo {
/**
* MULTIKEY INDEX BOUNDS RULES
*
* 1. In general for a multikey index, we cannot intersect bounds
* even if the index is not compound.
* Example:
* Let's say we have the document {a: [5, 7]}.
* This document satisfies the query {$and: [ {a: 5}, {a: 7} ] }
* For the index {a:1} we have the keys {"": 5} and {"": 7}.
* Each child of the AND is tagged with the index {a: 1}
* The interval for the {a: 5} branch is [5, 5]. It is exact.
* The interval for the {a: 7} branch is [7, 7]. It is exact.
* The intersection of the intervals is {}.
* If we scan over {}, the intersection of the intervals, we will retrieve nothing.
*
* 2. In general for a multikey compound index, we *can* compound the bounds.
* For example, if we have multikey index {a: 1, b: 1} and query {a: 2, b: 3},
* we can use the bounds {a: [[2, 2]], b: [[3, 3]]}.
*
* 3. Despite rule #2, if fields in the compound index share a prefix, then it
* is not safe to compound the bounds. We can only specify bounds for the first
* field.
* Example:
* Let's say we have the document {a: [ {b: 3}, {c: 4} ] }
* This document satisfies the query {'a.b': 3, 'a.c': 4}.
* For the index {'a.b': 1, 'a.c': 1} we have the keys {"": 3, "": null} and
* {"": null, "": 4}.
* Let's use the aforementioned index to answer the query.
* The bounds for 'a.b' are [3,3], and the bounds for 'a.c' are [4,4].
* If we combine the bounds, we would only look at keys {"": 3, "":4 }.
* Therefore we wouldn't look at the document's keys in the index.
* Therefore we don't combine bounds.
*
* 4. There is an exception to rule #1, and that is when we're evaluating
* an $elemMatch.
* Example:
* Let's say that we have the same document from (1), {a: [5, 7]}.
* This document satisfies {a: {$lte: 5, $gte: 7}}, but it does not
* satisfy {a: {$elemMatch: {$lte: 5, $gte: 7}}}. The $elemMatch indicates
* that we are allowed to intersect the bounds, which means that we will
* scan over the empty interval {} and retrieve nothing. This is the
* expected result because there is no entry in the array "a" that
* simultaneously satisfies the predicates a<=5 and a>=7.
*
* 5. There is also an exception to rule #3, and that is when we're evaluating
* an $elemMatch. The bounds can be compounded for predicates that share a prefix
* so long as the shared prefix is the path for which there is an $elemMatch.
* Example:
* Suppose we have the same document from (3), {a: [{b: 3}, {c: 4}]}. As discussed
* above, we cannot compound the index bounds for query {'a.b': 1, 'a.c': 1}.
* However, for the query {a: {$elemMatch: {b: 1, c: 1}} we can compound the
* bounds because the $elemMatch is applied to the shared prefix "a".
*/
/**
* Methods for creating a QuerySolutionNode tree that accesses the data required by the query.
*/
class QueryPlannerAccess {
public:
/**
* Building the leaves (i.e. the index scans) is done by looping through
* predicates one at a time. During the process, there is a fair amount of state
* information to keep track of, which we consolidate into this data structure.
*/
struct ScanBuildingState {
ScanBuildingState(MatchExpression* theRoot,
bool inArrayOp,
const std::vector& indexList)
: root(theRoot),
inArrayOperator(inArrayOp),
indices(indexList),
currentScan(NULL),
curChild(0),
currentIndexNumber(IndexTag::kNoIndex),
ixtag(NULL),
tightness(IndexBoundsBuilder::INEXACT_FETCH),
curOr(NULL),
loosestBounds(IndexBoundsBuilder::EXACT) {
}
/**
* Reset the scan building state in preparation for building a new scan.
*
* This always should be called prior to allocating a new 'currentScan'.
*/
void resetForNextScan(IndexTag* newTag) {
currentScan.reset(NULL);
currentIndexNumber = newTag->index;
tightness = IndexBoundsBuilder::INEXACT_FETCH;
loosestBounds = IndexBoundsBuilder::EXACT;
if (MatchExpression::OR == root->matchType()) {
curOr.reset(new OrMatchExpression());
}
}
// The root of the MatchExpression tree for which we are currently building index
// scans. Should be either an AND node or an OR node.
MatchExpression* root;
// Are we inside an array operator such as $elemMatch or $all?
bool inArrayOperator;
// A list of relevant indices which 'root' may be tagged to use.
const std::vector& indices;
// The index access node that we are currently constructing. We may merge
// multiple tagged predicates into a single index scan.
std::auto_ptr currentScan;
// An index into the child vector of 'root'. Indicates the child MatchExpression
// for which we are currently either constructing a new scan or which we are about
// to merge with 'currentScan'.
size_t curChild;
// An index into the 'indices', so that 'indices[currentIndexNumber]' gives the
// index used by 'currentScan'. If there is no currentScan, this should be set
// to 'IndexTag::kNoIndex'.
size_t currentIndexNumber;
// The tag on 'curChild'.
IndexTag* ixtag;
// Whether the bounds for predicate 'curChild' are exact, inexact and covered by
// the index, or inexact with a fetch required.
IndexBoundsBuilder::BoundsTightness tightness;
// If 'root' is an $or, the child predicates which are tagged with the same index are
// detached from the original root and added here. 'curOr' may be attached as a filter
// later on, or ignored and cleaned up by the auto_ptr.
std::auto_ptr curOr;
// The values of BoundsTightness range from loosest to tightest in this order:
//
// INEXACT_FETCH < INEXACT_COVERED < EXACT
//
// 'loosestBounds' stores the smallest of these three values encountered so far for
// the current scan. If at least one of the child predicates assigned to the current
// index is INEXACT_FETCH, then 'loosestBounds' is INEXACT_FETCH. If at least one of
// the child predicates assigned to the current index is INEXACT_COVERED but none are
// INEXACT_FETCH, then 'loosestBounds' is INEXACT_COVERED.
IndexBoundsBuilder::BoundsTightness loosestBounds;
private:
// Default constructor is not allowed.
ScanBuildingState();
};
/**
* Return a CollectionScanNode that scans as requested in 'query'.
*/
static QuerySolutionNode* makeCollectionScan(const CanonicalQuery& query,
bool tailable,
const QueryPlannerParams& params);
/**
* Return a plan that uses the provided index as a proxy for a collection scan.
*/
static QuerySolutionNode* scanWholeIndex(const IndexEntry& index,
const CanonicalQuery& query,
const QueryPlannerParams& params,
int direction = 1);
/**
* Return a plan that scans the provided index from [startKey to endKey).
*/
static QuerySolutionNode* makeIndexScan(const IndexEntry& index,
const CanonicalQuery& query,
const QueryPlannerParams& params,
const BSONObj& startKey,
const BSONObj& endKey);
//
// Indexed Data Access methods.
//
// The inArrayOperator flag deserves some attention. It is set when we're processing a
// child of an MatchExpression::ELEM_MATCH_OBJECT.
//
// When true, the following behavior changes for all methods below that take it as an argument:
// 0. No deletion of MatchExpression(s). In fact,
// 1. No mutation of the MatchExpression at all. We need the tree as-is in order to perform
// a filter on the entire tree.
// 2. No fetches performed. There will be a final fetch by the caller of buildIndexedDataAccess
// who set the value of inArrayOperator to true.
// 3. No compound indices are used and no bounds are combined. These are incorrect in the context
// of these operators.
//
/**
* If 'inArrayOperator' is false, takes ownership of 'root'.
*/
static QuerySolutionNode* buildIndexedDataAccess(const CanonicalQuery& query,
MatchExpression* root,
bool inArrayOperator,
const std::vector& indices,
const QueryPlannerParams& params);
/**
* Takes ownership of 'root'.
*/
static QuerySolutionNode* buildIndexedAnd(const CanonicalQuery& query,
MatchExpression* root,
bool inArrayOperator,
const std::vector& indices,
const QueryPlannerParams& params);
/**
* Takes ownership of 'root'.
*/
static QuerySolutionNode* buildIndexedOr(const CanonicalQuery& query,
MatchExpression* root,
bool inArrayOperator,
const std::vector& indices,
const QueryPlannerParams& params);
/**
* Traverses the tree rooted at the $elemMatch expression 'node',
* finding all predicates that can use an index directly and returning
* them in the out-parameter vector 'out'.
*
* Traverses only through AND and ELEM_MATCH_OBJECT nodes.
*
* Other nodes (i.e. nodes which cannot use an index directly, and which are
* neither AND nor ELEM_MATCH_OBJECT) are returned in 'subnodesOut' if they are
* tagged to use an index.
*/
static void findElemMatchChildren(const MatchExpression* node,
std::vector* out,
std::vector* subnodesOut);
/**
* Helper used by buildIndexedAnd and buildIndexedOr.
*
* The children of AND and OR nodes are sorted by the index that the subtree rooted at
* that node uses. Child nodes that use the same index are adjacent to one another to
* facilitate grouping of index scans. As such, the processing for AND and OR is
* almost identical.
*
* See tagForSort and sortUsingTags in index_tag.h for details on ordering the children
* of OR and AND.
*
* Does not take ownership of 'root' but may remove children from it.
*/
static bool processIndexScans(const CanonicalQuery& query,
MatchExpression* root,
bool inArrayOperator,
const std::vector& indices,
const QueryPlannerParams& params,
std::vector* out);
/**
* Used by processIndexScans(...) in order to recursively build a data access
* plan for a "subnode", a node in the MatchExpression tree which is indexed by
* virtue of its children.
*
* The resulting scans are outputted in the out-parameter 'out'.
*/
static bool processIndexScansSubnode(const CanonicalQuery& query,
ScanBuildingState* scanState,
const QueryPlannerParams& params,
std::vector* out);
/**
* Used by processIndexScansSubnode(...) to build the leaves of the solution tree for an
* ELEM_MATCH_OBJECT node beneath an AND.
*
* The resulting scans are outputted in the out-parameter 'out'.
*/
static bool processIndexScansElemMatch(const CanonicalQuery& query,
ScanBuildingState* scanState,
const QueryPlannerParams& params,
std::vector* out);
//
// Helpers for creating an index scan.
//
/**
* Create a new data access node.
*
* If the node is an index scan, the bounds for 'expr' are computed and placed into the
* first field's OIL position. The rest of the OILs are allocated but uninitialized.
*
* If the node is a geo node, grab the geo data from 'expr' and stuff it into the
* geo solution node of the appropriate type.
*/
static QuerySolutionNode* makeLeafNode(const CanonicalQuery& query,
const IndexEntry& index,
size_t pos,
MatchExpression* expr,
IndexBoundsBuilder::BoundsTightness* tightnessOut);
/**
* Merge the predicate 'expr' with the leaf node 'node'.
*/
static void mergeWithLeafNode(MatchExpression* expr, ScanBuildingState* scanState);
/**
* Determines whether it is safe to merge the expression 'expr' with
* the leaf node of the query solution contained in 'scanState'.
*
* Does not take ownership of its arguments.
*/
static bool shouldMergeWithLeaf(const MatchExpression* expr,
const ScanBuildingState& scanState);
/**
* If index scan (regular or expression index), fill in any bounds that are missing in
* 'node' with the "all values for this field" interval.
*
* If geo, do nothing.
* If text, punt to finishTextNode.
*/
static void finishLeafNode(QuerySolutionNode* node, const IndexEntry& index);
/**
* Fills in any missing bounds by calling finishLeafNode(...) for the scan contained in
* 'scanState'. The resulting scan is outputted in the out-parameter 'out', transferring
* ownership in the process.
*
* If 'scanState' is building an index scan for OR-related predicates, filters
* may be affixed to the scan as necessary.
*/
static void finishAndOutputLeaf(ScanBuildingState* scanState,
std::vector* out);
/**
* Returns true if the current scan in 'scanState' requires a FetchNode.
*/
static bool orNeedsFetch(const ScanBuildingState* scanState);
static void finishTextNode(QuerySolutionNode* node, const IndexEntry& index);
/**
* Add the filter 'match' to the query solution node 'node'. Takes
* ownership of 'match'.
*
* The MatchType, 'type', indicates whether 'match' is a child of an
* AND or an OR match expression.
*/
static void addFilterToSolutionNode(QuerySolutionNode* node, MatchExpression* match,
MatchExpression::MatchType type);
/**
* Once a predicate is merged into the current scan, there are a few things we might
* want to do with the filter:
* 1) Detach the filter from its parent and delete it because the predicate is
* answered by exact index bounds.
* 2) Leave the filter alone so that it can be affixed as part of a fetch node later.
* 3) Detach the filter from its parent and attach it directly to an index scan node.
* We can sometimes due this for INEXACT_COVERED predicates which are not answered exactly
* by the bounds, but can be answered by examing the data in the index key.
* 4) Detach the filter from its parent and attach it as a child of a separate
* MatchExpression tree. This is done for proper handling of inexact bounds for $or
* queries.
*
* This executes one of the four options above, according to the data in 'scanState'.
*/
static void handleFilter(ScanBuildingState* scanState);
/**
* Implements handleFilter(...) for OR queries.
*/
static void handleFilterAnd(ScanBuildingState* scanState);
/**
* Implements handleFilter(...) for AND queries.
*/
static void handleFilterOr(ScanBuildingState* scanState);
};
} // namespace mongo