/**
* Copyright (C) 2013 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery
#include "mongo/platform/basic.h"
#include "mongo/db/query/planner_access.h"
#include
#include
#include "mongo/base/owned_pointer_vector.h"
#include "mongo/bson/simple_bsonobj_comparator.h"
#include "mongo/db/bson/dotted_path_support.h"
#include "mongo/db/matcher/expression_array.h"
#include "mongo/db/matcher/expression_geo.h"
#include "mongo/db/matcher/expression_text.h"
#include "mongo/db/query/index_bounds_builder.h"
#include "mongo/db/query/index_tag.h"
#include "mongo/db/query/indexability.h"
#include "mongo/db/query/query_knobs.h"
#include "mongo/db/query/query_planner.h"
#include "mongo/db/query/query_planner_common.h"
#include "mongo/stdx/memory.h"
#include "mongo/util/log.h"
namespace {
using namespace mongo;
namespace dps = ::mongo::dotted_path_support;
/**
* Text node functors.
*/
bool isTextNode(const QuerySolutionNode* node) {
return STAGE_TEXT == node->getType();
}
/**
* Casts 'node' to a FetchNode* if it is a FetchNode, otherwise returns null.
*/
FetchNode* getFetchNode(QuerySolutionNode* node) {
if (STAGE_FETCH != node->getType()) {
return nullptr;
}
return static_cast(node);
}
/**
* If 'node' is an index scan node, casts it to IndexScanNode*. If 'node' is a FetchNode with an
* IndexScanNode child, then returns a pointer to the child index scan node. Otherwise returns
* null.
*/
const IndexScanNode* getIndexScanNode(const QuerySolutionNode* node) {
if (STAGE_IXSCAN == node->getType()) {
return static_cast(node);
} else if (STAGE_FETCH == node->getType()) {
invariant(1U == node->children.size());
const QuerySolutionNode* child = node->children[0];
if (STAGE_IXSCAN == child->getType()) {
return static_cast(child);
}
}
return nullptr;
}
/**
* Takes as input two query solution nodes returned by processIndexScans(). If both are
* IndexScanNode or FetchNode with an IndexScanNode child and the index scan nodes are identical
* (same bounds, same filter, same direction, etc.), then returns true. Otherwise returns false.
*/
bool scansAreEquivalent(const QuerySolutionNode* lhs, const QuerySolutionNode* rhs) {
const IndexScanNode* leftIxscan = getIndexScanNode(lhs);
const IndexScanNode* rightIxscan = getIndexScanNode(rhs);
if (!leftIxscan || !rightIxscan) {
return false;
}
return *leftIxscan == *rightIxscan;
}
} // namespace
namespace mongo {
using std::unique_ptr;
using std::vector;
using stdx::make_unique;
// static
QuerySolutionNode* QueryPlannerAccess::makeCollectionScan(const CanonicalQuery& query,
bool tailable,
const QueryPlannerParams& params) {
// Make the (only) node, a collection scan.
CollectionScanNode* csn = new CollectionScanNode();
csn->name = query.ns();
csn->filter = query.root()->shallowClone();
csn->tailable = tailable;
csn->maxScan = query.getQueryRequest().getMaxScan();
// If the hint is {$natural: +-1} this changes the direction of the collection scan.
if (!query.getQueryRequest().getHint().isEmpty()) {
BSONElement natural =
dps::extractElementAtPath(query.getQueryRequest().getHint(), "$natural");
if (!natural.eoo()) {
csn->direction = natural.numberInt() >= 0 ? 1 : -1;
}
}
// The sort can specify $natural as well. The sort direction should override the hint
// direction if both are specified.
const BSONObj& sortObj = query.getQueryRequest().getSort();
if (!sortObj.isEmpty()) {
BSONElement natural = dps::extractElementAtPath(sortObj, "$natural");
if (!natural.eoo()) {
csn->direction = natural.numberInt() >= 0 ? 1 : -1;
}
}
return csn;
}
// static
QuerySolutionNode* QueryPlannerAccess::makeLeafNode(
const CanonicalQuery& query,
const IndexEntry& index,
size_t pos,
MatchExpression* expr,
IndexBoundsBuilder::BoundsTightness* tightnessOut) {
// We're guaranteed that all GEO_NEARs are first. This slightly violates the "sort index
// predicates by their position in the compound index" rule but GEO_NEAR isn't an ixscan.
// This saves our bacon when we have {foo: 1, bar: "2dsphere"} and the predicate on bar is a
// $near. If we didn't get the GEO_NEAR first we'd create an IndexScanNode and later cast
// it to a GeoNear2DSphereNode
//
// This should gracefully deal with the case where we have a pred over foo but no geo clause
// over bar. In that case there is no GEO_NEAR to appear first and it's treated like a
// straight ixscan.
if (MatchExpression::GEO_NEAR == expr->matchType()) {
// We must not keep the expression node around.
*tightnessOut = IndexBoundsBuilder::EXACT;
GeoNearMatchExpression* nearExpr = static_cast(expr);
BSONElement elt = index.keyPattern.firstElement();
bool indexIs2D = (String == elt.type() && "2d" == elt.String());
if (indexIs2D) {
GeoNear2DNode* ret = new GeoNear2DNode(index);
ret->nq = &nearExpr->getData();
ret->baseBounds.fields.resize(index.keyPattern.nFields());
if (NULL != query.getProj()) {
ret->addPointMeta = query.getProj()->wantGeoNearPoint();
ret->addDistMeta = query.getProj()->wantGeoNearDistance();
}
return ret;
} else {
GeoNear2DSphereNode* ret = new GeoNear2DSphereNode(index);
ret->nq = &nearExpr->getData();
ret->baseBounds.fields.resize(index.keyPattern.nFields());
if (NULL != query.getProj()) {
ret->addPointMeta = query.getProj()->wantGeoNearPoint();
ret->addDistMeta = query.getProj()->wantGeoNearDistance();
}
return ret;
}
} else if (MatchExpression::TEXT == expr->matchType()) {
// We must not keep the expression node around.
*tightnessOut = IndexBoundsBuilder::EXACT;
TextMatchExpressionBase* textExpr = static_cast(expr);
TextNode* ret = new TextNode(index);
ret->ftsQuery = textExpr->getFTSQuery().clone();
return ret;
} else {
// Note that indexKeyPattern.firstElement().fieldName() may not equal expr->path()
// because expr might be inside an array operator that provides a path prefix.
IndexScanNode* isn = new IndexScanNode(index);
isn->bounds.fields.resize(index.keyPattern.nFields());
isn->maxScan = query.getQueryRequest().getMaxScan();
isn->addKeyMetadata = query.getQueryRequest().returnKey();
isn->queryCollator = query.getCollator();
// Get the ixtag->pos-th element of the index key pattern.
// TODO: cache this instead/with ixtag->pos?
BSONObjIterator it(index.keyPattern);
BSONElement keyElt = it.next();
for (size_t i = 0; i < pos; ++i) {
verify(it.more());
keyElt = it.next();
}
verify(!keyElt.eoo());
IndexBoundsBuilder::translate(expr, keyElt, index, &isn->bounds.fields[pos], tightnessOut);
return isn;
}
}
bool QueryPlannerAccess::shouldMergeWithLeaf(const MatchExpression* expr,
const ScanBuildingState& scanState) {
const QuerySolutionNode* node = scanState.currentScan.get();
if (NULL == node || NULL == expr) {
return false;
}
if (NULL == scanState.ixtag) {
return false;
}
if (scanState.currentIndexNumber != scanState.ixtag->index) {
return false;
}
size_t pos = scanState.ixtag->pos;
const IndexEntry& index = scanState.indices[scanState.currentIndexNumber];
const MatchExpression::MatchType mergeType = scanState.root->matchType();
const StageType type = node->getType();
const MatchExpression::MatchType exprType = expr->matchType();
//
// First handle special solution tree leaf types. In general, normal index bounds
// building is not used for special leaf types, and hence we cannot merge leaves.
//
// This rule is always true for OR, but there are exceptions for AND.
// Specifically, we can often merge a predicate with a special leaf type
// by adding a filter to the special leaf type.
//
if (STAGE_TEXT == type) {
// Currently only one text predicate is allowed, but to be safe, make sure that we
// do not try to merge two text predicates.
return MatchExpression::AND == mergeType && MatchExpression::TEXT != exprType;
}
if (STAGE_GEO_NEAR_2D == type || STAGE_GEO_NEAR_2DSPHERE == type) {
// Currently only one GEO_NEAR is allowed, but to be safe, make sure that we
// do not try to merge two GEO_NEAR predicates.
return MatchExpression::AND == mergeType && MatchExpression::GEO_NEAR != exprType;
}
//
// If we're here, then we're done checking for special leaf nodes, and the leaf
// must be a regular index scan.
//
invariant(type == STAGE_IXSCAN);
const IndexScanNode* scan = static_cast(node);
const IndexBounds* boundsToFillOut = &scan->bounds;
if (boundsToFillOut->fields[pos].name.empty()) {
// Bounds have yet to be assigned for the 'pos' position in the index. The plan enumerator
// should have told us that it is safe to compound bounds in this case.
invariant(scanState.ixtag->canCombineBounds);
return true;
} else {
// Bounds have already been assigned for the 'pos' position in the index.
if (MatchExpression::AND == mergeType) {
// The bounds on the 'pos' position in the index would be intersected if we merged these
// two leaf expressions.
if (!scanState.ixtag->canCombineBounds) {
// If the plan enumerator told us that it isn't safe to intersect bounds in this
// case, then it must be because we're using a multikey index.
invariant(index.multikey);
}
return scanState.ixtag->canCombineBounds;
} else {
// The bounds will be unionized.
return true;
}
}
}
void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingState* scanState) {
QuerySolutionNode* node = scanState->currentScan.get();
invariant(NULL != node);
const MatchExpression::MatchType mergeType = scanState->root->matchType();
size_t pos = scanState->ixtag->pos;
const IndexEntry& index = scanState->indices[scanState->currentIndexNumber];
const StageType type = node->getType();
// Text data is covered, but not exactly. Text covering is unlike any other covering
// so we deal with it in addFilterToSolutionNode.
if (STAGE_TEXT == type) {
scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED;
return;
}
IndexBounds* boundsToFillOut = NULL;
if (STAGE_GEO_NEAR_2D == type) {
invariant(INDEX_2D == index.type);
// 2D indexes are weird - the "2d" field stores a normally-indexed BinData field, but
// additional array fields are *not* exploded into multi-keys - they are stored directly
// as arrays in the index. Also, no matter what the index expression, the "2d" field is
// always first.
// This means that we can only generically accumulate bounds for 2D indexes over the
// first "2d" field (pos == 0) - MatchExpressions over other fields in the 2D index may
// be covered (can be evaluated using only the 2D index key). The additional fields
// must not affect the index scan bounds, since they are not stored in an
// IndexScan-compatible format.
if (pos > 0) {
// Marking this field as covered allows the planner to accumulate a MatchExpression
// over the returned 2D index keys instead of adding to the index bounds.
scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED;
return;
}
// We may have other $geoPredicates on a near index - generate bounds for these
GeoNear2DNode* gn = static_cast(node);
boundsToFillOut = &gn->baseBounds;
} else if (STAGE_GEO_NEAR_2DSPHERE == type) {
GeoNear2DSphereNode* gn = static_cast(node);
boundsToFillOut = &gn->baseBounds;
} else {
verify(type == STAGE_IXSCAN);
IndexScanNode* scan = static_cast(node);
// See STAGE_GEO_NEAR_2D above - 2D indexes can only accumulate scan bounds over the
// first "2d" field (pos == 0)
if (INDEX_2D == index.type && pos > 0) {
scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED;
return;
}
boundsToFillOut = &scan->bounds;
}
// Get the ixtag->pos-th element of the index key pattern.
// TODO: cache this instead/with ixtag->pos?
BSONObjIterator it(index.keyPattern);
BSONElement keyElt = it.next();
for (size_t i = 0; i < pos; ++i) {
verify(it.more());
keyElt = it.next();
}
verify(!keyElt.eoo());
scanState->tightness = IndexBoundsBuilder::INEXACT_FETCH;
verify(boundsToFillOut->fields.size() > pos);
OrderedIntervalList* oil = &boundsToFillOut->fields[pos];
if (boundsToFillOut->fields[pos].name.empty()) {
IndexBoundsBuilder::translate(expr, keyElt, index, oil, &scanState->tightness);
} else {
if (MatchExpression::AND == mergeType) {
IndexBoundsBuilder::translateAndIntersect(
expr, keyElt, index, oil, &scanState->tightness);
} else {
verify(MatchExpression::OR == mergeType);
IndexBoundsBuilder::translateAndUnion(expr, keyElt, index, oil, &scanState->tightness);
}
}
}
// static
void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntry& index) {
TextNode* tn = static_cast(node);
// Figure out what positions are prefix positions. We build an index key prefix from
// the predicates over the text index prefix keys.
// For example, say keyPattern = { a: 1, _fts: "text", _ftsx: 1, b: 1 }
// prefixEnd should be 1.
size_t prefixEnd = 0;
BSONObjIterator it(tn->index.keyPattern);
// Count how many prefix terms we have.
while (it.more()) {
// We know that the only key pattern with a type of String is the _fts field
// which is immediately after all prefix fields.
if (String == it.next().type()) {
break;
}
++prefixEnd;
}
// If there's no prefix, the filter is already on the node and the index prefix is null.
// We can just return.
if (!prefixEnd) {
return;
}
// We can't create a text stage if there aren't EQ predicates on its prefix terms. So
// if we've made it this far, we should have collected the prefix predicates in the
// filter.
invariant(NULL != tn->filter.get());
MatchExpression* textFilterMe = tn->filter.get();
BSONObjBuilder prefixBob;
if (MatchExpression::AND != textFilterMe->matchType()) {
// Only one prefix term.
invariant(1 == prefixEnd);
// Sanity check: must be an EQ.
invariant(MatchExpression::EQ == textFilterMe->matchType());
EqualityMatchExpression* eqExpr = static_cast(textFilterMe);
prefixBob.append(eqExpr->getData());
tn->filter.reset();
} else {
invariant(MatchExpression::AND == textFilterMe->matchType());
// Indexed by the keyPattern position index assignment. We want to add
// prefixes in order but we must order them first.
vector prefixExprs(prefixEnd, NULL);
AndMatchExpression* amExpr = static_cast(textFilterMe);
invariant(amExpr->numChildren() >= prefixEnd);
// Look through the AND children. The prefix children we want to
// stash in prefixExprs.
size_t curChild = 0;
while (curChild < amExpr->numChildren()) {
MatchExpression* child = amExpr->getChild(curChild);
IndexTag* ixtag = static_cast(child->getTag());
invariant(NULL != ixtag);
// Skip this child if it's not part of a prefix, or if we've already assigned a
// predicate to this prefix position.
if (ixtag->pos >= prefixEnd || prefixExprs[ixtag->pos] != NULL) {
++curChild;
continue;
}
// prefixExprs takes ownership of 'child'.
prefixExprs[ixtag->pos] = child;
amExpr->getChildVector()->erase(amExpr->getChildVector()->begin() + curChild);
// Don't increment curChild.
}
// Go through the prefix equalities in order and create an index prefix out of them.
for (size_t i = 0; i < prefixExprs.size(); ++i) {
MatchExpression* prefixMe = prefixExprs[i];
invariant(NULL != prefixMe);
invariant(MatchExpression::EQ == prefixMe->matchType());
EqualityMatchExpression* eqExpr = static_cast(prefixMe);
prefixBob.append(eqExpr->getData());
// We removed this from the AND expression that owned it, so we must clean it
// up ourselves.
delete prefixMe;
}
// Clear out an empty $and.
if (0 == amExpr->numChildren()) {
tn->filter.reset();
} else if (1 == amExpr->numChildren()) {
// Clear out unsightly only child of $and
MatchExpression* child = amExpr->getChild(0);
amExpr->getChildVector()->clear();
// Deletes current filter which is amExpr.
tn->filter.reset(child);
}
}
tn->indexPrefix = prefixBob.obj();
}
// static
bool QueryPlannerAccess::orNeedsFetch(const ScanBuildingState* scanState) {
if (scanState->loosestBounds == IndexBoundsBuilder::EXACT) {
return false;
} else if (scanState->loosestBounds == IndexBoundsBuilder::INEXACT_FETCH) {
return true;
} else {
invariant(scanState->loosestBounds == IndexBoundsBuilder::INEXACT_COVERED);
const IndexEntry& index = scanState->indices[scanState->currentIndexNumber];
return index.multikey;
}
}
// static
void QueryPlannerAccess::finishAndOutputLeaf(ScanBuildingState* scanState,
vector* out) {
finishLeafNode(scanState->currentScan.get(), scanState->indices[scanState->currentIndexNumber]);
if (MatchExpression::OR == scanState->root->matchType()) {
if (orNeedsFetch(scanState)) {
// In order to correctly evaluate the predicates for this index, we have to
// fetch the full documents. Add a fetch node above the index scan whose filter
// includes *all* of the predicates used to generate the ixscan.
FetchNode* fetch = new FetchNode();
// Takes ownership.
fetch->filter.reset(scanState->curOr.release());
// Takes ownership.
fetch->children.push_back(scanState->currentScan.release());
scanState->currentScan.reset(fetch);
} else if (scanState->loosestBounds == IndexBoundsBuilder::INEXACT_COVERED) {
// This an OR, at least one of the predicates used to generate 'currentScan'
// is inexact covered, but none is inexact fetch. This means that we can put
// these predicates, joined by an $or, as filters on the index scan. This avoids
// a fetch and allows the predicates to be covered by the index.
//
// Ex.
// Say we have index {a: 1} and query {$or: [{a: /foo/}, {a: /bar/}]}.
// The entire query, {$or: [{a: /foo/}, {a: /bar/}]}, should be a filter
// in the index scan stage itself.
scanState->currentScan->filter.reset(scanState->curOr.release());
}
}
out->push_back(scanState->currentScan.release());
}
// static
void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntry& index) {
const StageType type = node->getType();
if (STAGE_TEXT == type) {
finishTextNode(node, index);
return;
}
IndexBounds* bounds = NULL;
if (STAGE_GEO_NEAR_2D == type) {
GeoNear2DNode* gnode = static_cast(node);
bounds = &gnode->baseBounds;
} else if (STAGE_GEO_NEAR_2DSPHERE == type) {
GeoNear2DSphereNode* gnode = static_cast(node);
bounds = &gnode->baseBounds;
} else {
verify(type == STAGE_IXSCAN);
IndexScanNode* scan = static_cast(node);
bounds = &scan->bounds;
}
// Find the first field in the scan's bounds that was not filled out.
// TODO: could cache this.
size_t firstEmptyField = 0;
for (firstEmptyField = 0; firstEmptyField < bounds->fields.size(); ++firstEmptyField) {
if ("" == bounds->fields[firstEmptyField].name) {
verify(bounds->fields[firstEmptyField].intervals.empty());
break;
}
}
// All fields are filled out with bounds, nothing to do.
if (firstEmptyField == bounds->fields.size()) {
IndexBoundsBuilder::alignBounds(bounds, index.keyPattern);
return;
}
// Skip ahead to the firstEmptyField-th element, where we begin filling in bounds.
BSONObjIterator it(index.keyPattern);
for (size_t i = 0; i < firstEmptyField; ++i) {
verify(it.more());
it.next();
}
// For each field in the key...
while (it.more()) {
BSONElement kpElt = it.next();
// There may be filled-in fields to the right of the firstEmptyField.
// Example:
// The index {loc:"2dsphere", x:1}
// With a predicate over x and a near search over loc.
if ("" == bounds->fields[firstEmptyField].name) {
verify(bounds->fields[firstEmptyField].intervals.empty());
// ...build the "all values" interval.
IndexBoundsBuilder::allValuesForField(kpElt, &bounds->fields[firstEmptyField]);
}
++firstEmptyField;
}
// Make sure that the length of the key is the length of the bounds we started.
verify(firstEmptyField == bounds->fields.size());
// We create bounds assuming a forward direction but can easily reverse bounds to align
// according to our desired direction.
IndexBoundsBuilder::alignBounds(bounds, index.keyPattern);
}
// static
void QueryPlannerAccess::findElemMatchChildren(const MatchExpression* node,
vector* out,
vector* subnodesOut) {
for (size_t i = 0; i < node->numChildren(); ++i) {
MatchExpression* child = node->getChild(i);
if (Indexability::isBoundsGenerating(child) && NULL != child->getTag()) {
out->push_back(child);
} else if (MatchExpression::AND == child->matchType() ||
Indexability::arrayUsesIndexOnChildren(child)) {
findElemMatchChildren(child, out, subnodesOut);
} else if (NULL != child->getTag()) {
subnodesOut->push_back(child);
}
}
}
// static
std::vector QueryPlannerAccess::collapseEquivalentScans(
const std::vector scans) {
OwnedPointerVector ownedScans(scans);
invariant(ownedScans.size() > 0);
// Scans that need to be collapsed will be adjacent to each other in the list due to how we
// sort the query predicate. We step through the list, either merging the current scan into
// the last scan in 'collapsedScans', or adding a new entry to 'collapsedScans' if it can't
// be merged.
OwnedPointerVector collapsedScans;
collapsedScans.push_back(ownedScans.releaseAt(0));
for (size_t i = 1; i < ownedScans.size(); ++i) {
if (scansAreEquivalent(collapsedScans.back(), ownedScans[i])) {
// We collapse the entry from 'ownedScans' into the back of 'collapsedScans'.
std::unique_ptr collapseFrom(ownedScans.releaseAt(i));
FetchNode* collapseFromFetch = getFetchNode(collapseFrom.get());
FetchNode* collapseIntoFetch = getFetchNode(collapsedScans.back());
// If there's no filter associated with a fetch node on 'collapseFrom', all we have to
// do is clear the filter on the node that we are collapsing into.
if (!collapseFromFetch || !collapseFromFetch->filter.get()) {
if (collapseIntoFetch) {
collapseIntoFetch->filter.reset();
}
continue;
}
// If there's no filter associated with a fetch node on the back of the 'collapsedScans'
// list, then there's nothing more to do.
if (!collapseIntoFetch || !collapseIntoFetch->filter.get()) {
continue;
}
// Both the 'from' and 'into' nodes have filters. We join them with an
// OrMatchExpression.
std::unique_ptr collapsedFilter =
stdx::make_unique();
collapsedFilter->add(collapseFromFetch->filter.release());
collapsedFilter->add(collapseIntoFetch->filter.release());
// Normalize the filter and add it to 'into'.
collapseIntoFetch->filter.reset(
CanonicalQuery::normalizeTree(collapsedFilter.release()));
} else {
// Scans are not equivalent and can't be collapsed.
collapsedScans.push_back(ownedScans.releaseAt(i));
}
}
invariant(collapsedScans.size() > 0);
return collapsedScans.release();
}
// static
bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
MatchExpression* root,
bool inArrayOperator,
const std::vector& indices,
const QueryPlannerParams& params,
std::vector* out) {
// Initialize the ScanBuildingState.
ScanBuildingState scanState(root, inArrayOperator, indices);
while (scanState.curChild < root->numChildren()) {
MatchExpression* child = root->getChild(scanState.curChild);
// If there is no tag, it's not using an index. We've sorted our children such that the
// children with tags are first, so we stop now.
if (NULL == child->getTag()) {
break;
}
scanState.ixtag = static_cast(child->getTag());
// If there's a tag it must be valid.
verify(IndexTag::kNoIndex != scanState.ixtag->index);
// If the child can't use an index on its own field (and the child is not a negation
// of a bounds-generating expression), then it's indexed by virtue of one of
// its children having an index.
//
// NOTE: If the child is logical, it could possibly collapse into a single ixscan. we
// ignore this for now.
if (!Indexability::isBoundsGenerating(child)) {
// If we're here, then the child is indexed by virtue of its children.
// In most cases this means that we recursively build indexed data
// access on 'child'.
if (!processIndexScansSubnode(query, &scanState, params, out)) {
return false;
}
continue;
}
// If we're here, we now know that 'child' can use an index directly and the index is
// over the child's field.
// If 'child' is a NOT, then the tag we're interested in is on the NOT's
// child node.
if (MatchExpression::NOT == child->matchType()) {
scanState.ixtag = static_cast(child->getChild(0)->getTag());
invariant(IndexTag::kNoIndex != scanState.ixtag->index);
}
// If the child we're looking at uses a different index than the current index scan, add
// the current index scan to the output as we're done with it. The index scan created
// by the child then becomes our new current index scan. Note that the current scan
// could be NULL, in which case we don't output it. The rest of the logic is identical.
//
// If the child uses the same index as the current index scan, we may be able to merge
// the bounds for the two scans.
//
// Guiding principle: must the values we're testing come from the same array in the
// document? If so, we can combine bounds (via intersection or compounding). If not,
// we can't.
//
// If the index is NOT multikey, it's always semantically correct to combine bounds,
// as there are no arrays to worry about.
//
// If the index is multikey, there are arrays of values. There are several
// complications in the multikey case that have to be obeyed both by the enumerator
// and here as we try to merge predicates into query solution leaves. The hairy
// details of these rules are documented near the top of planner_access.h.
if (shouldMergeWithLeaf(child, scanState)) {
// The child uses the same index we're currently building a scan for. Merge
// the bounds and filters.
verify(scanState.currentIndexNumber == scanState.ixtag->index);
scanState.tightness = IndexBoundsBuilder::INEXACT_FETCH;
mergeWithLeafNode(child, &scanState);
handleFilter(&scanState);
} else {
if (NULL != scanState.currentScan.get()) {
// Output the current scan before starting to construct a new out.
finishAndOutputLeaf(&scanState, out);
} else {
verify(IndexTag::kNoIndex == scanState.currentIndexNumber);
}
// Reset state before producing a new leaf.
scanState.resetForNextScan(scanState.ixtag);
scanState.currentScan.reset(makeLeafNode(query,
indices[scanState.currentIndexNumber],
scanState.ixtag->pos,
child,
&scanState.tightness));
handleFilter(&scanState);
}
}
// Output the scan we're done with, if it exists.
if (NULL != scanState.currentScan.get()) {
finishAndOutputLeaf(&scanState, out);
}
return true;
}
// static
bool QueryPlannerAccess::processIndexScansElemMatch(const CanonicalQuery& query,
ScanBuildingState* scanState,
const QueryPlannerParams& params,
std::vector* out) {
MatchExpression* root = scanState->root;
MatchExpression* child = root->getChild(scanState->curChild);
const vector& indices = scanState->indices;
// We have an AND with an ELEM_MATCH_OBJECT child. The plan enumerator produces
// index taggings which indicate that we should try to compound with
// predicates retrieved from inside the subtree rooted at the ELEM_MATCH.
// In order to obey the enumerator's tagging, we need to retrieve these
// predicates from inside the $elemMatch, and try to merge them with
// the current index scan.
// Contains tagged predicates from inside the tree rooted at 'child'
// which are logically part of the AND.
vector emChildren;
// Contains tagged nodes that are not logically part of the AND and
// cannot use the index directly (e.g. OR nodes which are tagged to
// be indexed).
vector emSubnodes;
// Populate 'emChildren' and 'emSubnodes'.
findElemMatchChildren(child, &emChildren, &emSubnodes);
// Recursively build data access for the nodes inside 'emSubnodes'.
for (size_t i = 0; i < emSubnodes.size(); ++i) {
MatchExpression* subnode = emSubnodes[i];
if (!Indexability::isBoundsGenerating(subnode)) {
// Must pass true for 'inArrayOperator' because the subnode is
// beneath an ELEM_MATCH_OBJECT.
QuerySolutionNode* childSolution =
buildIndexedDataAccess(query, subnode, true, indices, params);
// buildIndexedDataAccess(...) returns NULL in error conditions, when
// it is unable to construct a query solution from a tagged match
// expression tree. If we are unable to construct a solution according
// to the instructions from the enumerator, then we bail out early
// (by returning false) rather than continuing on and potentially
// constructing an invalid solution tree.
if (NULL == childSolution) {
return false;
}
// Output the resulting solution tree.
out->push_back(childSolution);
}
}
// For each predicate in 'emChildren', try to merge it with the current index scan.
//
// This loop is similar to that in processIndexScans(...), except it does not call into
// handleFilters(...). Instead, we leave the entire $elemMatch filter intact. This way,
// the complete $elemMatch expression will be affixed as a filter later on.
for (size_t i = 0; i < emChildren.size(); ++i) {
MatchExpression* emChild = emChildren[i];
invariant(NULL != emChild->getTag());
scanState->ixtag = static_cast(emChild->getTag());
// If 'emChild' is a NOT, then the tag we're interested in is on the NOT's
// child node.
if (MatchExpression::NOT == emChild->matchType()) {
invariant(NULL != emChild->getChild(0)->getTag());
scanState->ixtag = static_cast(emChild->getChild(0)->getTag());
invariant(IndexTag::kNoIndex != scanState->ixtag->index);
}
if (shouldMergeWithLeaf(emChild, *scanState)) {
// The child uses the same index we're currently building a scan for. Merge
// the bounds and filters.
verify(scanState->currentIndexNumber == scanState->ixtag->index);
scanState->tightness = IndexBoundsBuilder::INEXACT_FETCH;
mergeWithLeafNode(emChild, scanState);
} else {
if (NULL != scanState->currentScan.get()) {
finishAndOutputLeaf(scanState, out);
} else {
verify(IndexTag::kNoIndex == scanState->currentIndexNumber);
}
scanState->currentIndexNumber = scanState->ixtag->index;
scanState->tightness = IndexBoundsBuilder::INEXACT_FETCH;
scanState->currentScan.reset(makeLeafNode(query,
indices[scanState->currentIndexNumber],
scanState->ixtag->pos,
emChild,
&scanState->tightness));
}
}
// We're done processing the $elemMatch child. We leave it hanging off
// it's AND parent so that it will be affixed as a filter later on,
// and move on to the next child of the AND.
++scanState->curChild;
return true;
}
// static
bool QueryPlannerAccess::processIndexScansSubnode(const CanonicalQuery& query,
ScanBuildingState* scanState,
const QueryPlannerParams& params,
std::vector* out) {
MatchExpression* root = scanState->root;
MatchExpression* child = root->getChild(scanState->curChild);
const vector& indices = scanState->indices;
bool inArrayOperator = scanState->inArrayOperator;
if (MatchExpression::AND == root->matchType() &&
MatchExpression::ELEM_MATCH_OBJECT == child->matchType()) {
return processIndexScansElemMatch(query, scanState, params, out);
} else if (!inArrayOperator) {
// The logical sub-tree is responsible for fully evaluating itself. Any
// required filters or fetches are already hung on it. As such, we remove the
// filter branch from our tree. buildIndexedDataAccess takes ownership of the
// child.
root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild);
// The curChild of today is the curChild+1 of yesterday.
} else {
++scanState->curChild;
}
// If inArrayOperator: takes ownership of child, which is OK, since we detached
// child from root.
QuerySolutionNode* childSolution =
buildIndexedDataAccess(query, child, inArrayOperator, indices, params);
if (NULL == childSolution) {
return false;
}
out->push_back(childSolution);
return true;
}
// static
QuerySolutionNode* QueryPlannerAccess::buildIndexedAnd(const CanonicalQuery& query,
MatchExpression* root,
bool inArrayOperator,
const vector& indices,
const QueryPlannerParams& params) {
unique_ptr autoRoot;
if (!inArrayOperator) {
autoRoot.reset(root);
}
// If we are not allowed to trim for ixisect, then clone the match expression before
// passing it to processIndexScans(), which may do the trimming. If we end up with
// an index intersection solution, then we use our copy of the match expression to be
// sure that the FETCH stage will recheck the entire predicate.
//
// XXX: This block is a hack to accommodate the storage layer concurrency model.
std::unique_ptr clonedRoot;
if (params.options & QueryPlannerParams::CANNOT_TRIM_IXISECT) {
clonedRoot = root->shallowClone();
}
vector ixscanNodes;
if (!processIndexScans(query, root, inArrayOperator, indices, params, &ixscanNodes)) {
return NULL;
}
//
// Process all non-indexed predicates. We hang these above the AND with a fetch and
// filter.
//
// This is the node we're about to return.
QuerySolutionNode* andResult;
// We must use an index for at least one child of the AND. We shouldn't be here if this
// isn't the case.
verify(ixscanNodes.size() >= 1);
// Short-circuit: an AND of one child is just the child.
if (ixscanNodes.size() == 1) {
andResult = ixscanNodes[0];
} else {
// Figure out if we want AndHashNode or AndSortedNode.
bool allSortedByDiskLoc = true;
for (size_t i = 0; i < ixscanNodes.size(); ++i) {
if (!ixscanNodes[i]->sortedByDiskLoc()) {
allSortedByDiskLoc = false;
break;
}
}
if (allSortedByDiskLoc) {
AndSortedNode* asn = new AndSortedNode();
asn->children.swap(ixscanNodes);
andResult = asn;
} else if (internalQueryPlannerEnableHashIntersection) {
AndHashNode* ahn = new AndHashNode();
ahn->children.swap(ixscanNodes);
andResult = ahn;
// The AndHashNode provides the sort order of its last child. If any of the
// possible subnodes of AndHashNode provides the sort order we care about, we put
// that one last.
for (size_t i = 0; i < ahn->children.size(); ++i) {
ahn->children[i]->computeProperties();
const BSONObjSet& sorts = ahn->children[i]->getSort();
if (sorts.end() != sorts.find(query.getQueryRequest().getSort())) {
std::swap(ahn->children[i], ahn->children.back());
break;
}
}
} else {
// We can't use sort-based intersection, and hash-based intersection is disabled.
// Clean up the index scans and bail out by returning NULL.
LOG(5) << "Can't build index intersection solution: "
<< "AND_SORTED is not possible and AND_HASH is disabled.";
for (size_t i = 0; i < ixscanNodes.size(); i++) {
delete ixscanNodes[i];
}
return NULL;
}
}
// Don't bother doing any kind of fetch analysis lite if we're doing it anyway above us.
if (inArrayOperator) {
return andResult;
}
// XXX: This block is a hack to accommodate the storage layer concurrency model.
if ((params.options & QueryPlannerParams::CANNOT_TRIM_IXISECT) &&
(andResult->getType() == STAGE_AND_HASH || andResult->getType() == STAGE_AND_SORTED)) {
// We got an index intersection solution, and we aren't allowed to answer predicates
// using the index. We add a fetch with the entire filter.
invariant(clonedRoot.get());
FetchNode* fetch = new FetchNode();
fetch->filter.reset(clonedRoot.release());
// Takes ownership of 'andResult'.
fetch->children.push_back(andResult);
return fetch;
}
// If there are any nodes still attached to the AND, we can't answer them using the
// index, so we put a fetch with filter.
if (root->numChildren() > 0) {
FetchNode* fetch = new FetchNode();
verify(NULL != autoRoot.get());
if (autoRoot->numChildren() == 1) {
// An $and of one thing is that thing.
MatchExpression* child = autoRoot->getChild(0);
autoRoot->getChildVector()->clear();
// Takes ownership.
fetch->filter.reset(child);
// 'autoRoot' will delete the empty $and.
} else { // root->numChildren() > 1
// Takes ownership.
fetch->filter.reset(autoRoot.release());
}
// takes ownership
fetch->children.push_back(andResult);
andResult = fetch;
} else {
// root has no children, let autoRoot get rid of it when it goes out of scope.
}
return andResult;
}
// static
QuerySolutionNode* QueryPlannerAccess::buildIndexedOr(const CanonicalQuery& query,
MatchExpression* root,
bool inArrayOperator,
const vector& indices,
const QueryPlannerParams& params) {
unique_ptr autoRoot;
if (!inArrayOperator) {
autoRoot.reset(root);
}
vector ixscanNodes;
if (!processIndexScans(query, root, inArrayOperator, indices, params, &ixscanNodes)) {
return NULL;
}
// Unlike an AND, an OR cannot have filters hanging off of it. We stop processing
// when any of our children lack index tags. If a node lacks an index tag it cannot
// be answered via an index.
if (!inArrayOperator && 0 != root->numChildren()) {
warning() << "planner OR error, non-indexed child of OR.";
// We won't enumerate an OR without indices for each child, so this isn't an issue, even
// if we have an AND with an OR child -- we won't get here unless the OR is fully
// indexed.
return NULL;
}
// If all index scans are identical, then we collapse them into a single scan. This prevents
// us from creating OR plans where the branches of the OR perform duplicate work.
ixscanNodes = collapseEquivalentScans(ixscanNodes);
QuerySolutionNode* orResult = NULL;
// An OR of one node is just that node.
if (1 == ixscanNodes.size()) {
orResult = ixscanNodes[0];
} else {
bool shouldMergeSort = false;
if (!query.getQueryRequest().getSort().isEmpty()) {
const BSONObj& desiredSort = query.getQueryRequest().getSort();
// If there exists a sort order that is present in each child, we can merge them and
// maintain that sort order / those sort orders.
ixscanNodes[0]->computeProperties();
BSONObjSet sharedSortOrders = ixscanNodes[0]->getSort();
if (!sharedSortOrders.empty()) {
for (size_t i = 1; i < ixscanNodes.size(); ++i) {
ixscanNodes[i]->computeProperties();
const auto& bsonCmp = SimpleBSONObjComparator::kInstance;
BSONObjSet isect = bsonCmp.makeBSONObjSet();
set_intersection(sharedSortOrders.begin(),
sharedSortOrders.end(),
ixscanNodes[i]->getSort().begin(),
ixscanNodes[i]->getSort().end(),
std::inserter(isect, isect.end()),
bsonCmp.makeLessThan());
sharedSortOrders = isect;
if (sharedSortOrders.empty()) {
break;
}
}
}
// TODO: If we're looking for the reverse of one of these sort orders we could
// possibly reverse the ixscan nodes.
shouldMergeSort = (sharedSortOrders.end() != sharedSortOrders.find(desiredSort));
}
if (shouldMergeSort) {
MergeSortNode* msn = new MergeSortNode();
msn->sort = query.getQueryRequest().getSort();
msn->children.swap(ixscanNodes);
orResult = msn;
} else {
OrNode* orn = new OrNode();
orn->children.swap(ixscanNodes);
orResult = orn;
}
}
// Evaluate text nodes first to ensure that text scores are available.
// Move text nodes to front of vector.
std::stable_partition(orResult->children.begin(), orResult->children.end(), isTextNode);
// OR must have an index for each child, so we should have detached all children from
// 'root', and there's nothing useful to do with an empty or MatchExpression. We let it die
// via autoRoot.
return orResult;
}
// static
QuerySolutionNode* QueryPlannerAccess::buildIndexedDataAccess(const CanonicalQuery& query,
MatchExpression* root,
bool inArrayOperator,
const vector& indices,
const QueryPlannerParams& params) {
if (root->isLogical() && !Indexability::isBoundsGeneratingNot(root)) {
if (MatchExpression::AND == root->matchType()) {
// Takes ownership of root.
return buildIndexedAnd(query, root, inArrayOperator, indices, params);
} else if (MatchExpression::OR == root->matchType()) {
// Takes ownership of root.
return buildIndexedOr(query, root, inArrayOperator, indices, params);
} else {
// Can't do anything with negated logical nodes index-wise.
if (!inArrayOperator) {
delete root;
}
return NULL;
}
} else {
unique_ptr autoRoot;
if (!inArrayOperator) {
autoRoot.reset(root);
}
// isArray or isLeaf is true. Either way, it's over one field, and the bounds builder
// deals with it.
if (NULL == root->getTag()) {
// No index to use here, not in the context of logical operator, so we're SOL.
return NULL;
} else if (Indexability::isBoundsGenerating(root)) {
// Make an index scan over the tagged index #.
IndexTag* tag = static_cast(root->getTag());
IndexBoundsBuilder::BoundsTightness tightness = IndexBoundsBuilder::EXACT;
QuerySolutionNode* soln =
makeLeafNode(query, indices[tag->index], tag->pos, root, &tightness);
verify(NULL != soln);
finishLeafNode(soln, indices[tag->index]);
if (inArrayOperator) {
return soln;
}
// If the bounds are exact, the set of documents that satisfy the predicate is
// exactly equal to the set of documents that the scan provides.
//
// If the bounds are not exact, the set of documents returned from the scan is a
// superset of documents that satisfy the predicate, and we must check the
// predicate.
if (tightness == IndexBoundsBuilder::EXACT) {
return soln;
} else if (tightness == IndexBoundsBuilder::INEXACT_COVERED &&
!indices[tag->index].multikey) {
verify(NULL == soln->filter.get());
soln->filter.reset(autoRoot.release());
return soln;
} else {
FetchNode* fetch = new FetchNode();
verify(NULL != autoRoot.get());
fetch->filter.reset(autoRoot.release());
fetch->children.push_back(soln);
return fetch;
}
} else if (Indexability::arrayUsesIndexOnChildren(root)) {
QuerySolutionNode* solution = NULL;
invariant(MatchExpression::ELEM_MATCH_OBJECT);
// The child is an AND.
invariant(1 == root->numChildren());
solution = buildIndexedDataAccess(query, root->getChild(0), true, indices, params);
if (NULL == solution) {
return NULL;
}
// There may be an array operator above us.
if (inArrayOperator) {
return solution;
}
FetchNode* fetch = new FetchNode();
// Takes ownership of 'root'.
verify(NULL != autoRoot.get());
fetch->filter.reset(autoRoot.release());
fetch->children.push_back(solution);
return fetch;
}
}
if (!inArrayOperator) {
delete root;
}
return NULL;
}
QuerySolutionNode* QueryPlannerAccess::scanWholeIndex(const IndexEntry& index,
const CanonicalQuery& query,
const QueryPlannerParams& params,
int direction) {
QuerySolutionNode* solnRoot = NULL;
// Build an ixscan over the id index, use it, and return it.
unique_ptr isn = make_unique(index);
isn->maxScan = query.getQueryRequest().getMaxScan();
isn->addKeyMetadata = query.getQueryRequest().returnKey();
isn->queryCollator = query.getCollator();
IndexBoundsBuilder::allValuesBounds(index.keyPattern, &isn->bounds);
if (-1 == direction) {
QueryPlannerCommon::reverseScans(isn.get());
isn->direction = -1;
}
unique_ptr filter = query.root()->shallowClone();
// If it's find({}) remove the no-op root.
if (MatchExpression::AND == filter->matchType() && (0 == filter->numChildren())) {
solnRoot = isn.release();
} else {
// TODO: We may not need to do the fetch if the predicates in root are covered. But
// for now it's safe (though *maybe* slower).
unique_ptr fetch = make_unique();
fetch->filter = std::move(filter);
fetch->children.push_back(isn.release());
solnRoot = fetch.release();
}
return solnRoot;
}
// static
void QueryPlannerAccess::addFilterToSolutionNode(QuerySolutionNode* node,
MatchExpression* match,
MatchExpression::MatchType type) {
if (NULL == node->filter) {
node->filter.reset(match);
} else if (type == node->filter->matchType()) {
// The 'node' already has either an AND or OR filter that matches 'type'. Add 'match' as
// another branch of the filter.
ListOfMatchExpression* listFilter = static_cast(node->filter.get());
listFilter->add(match);
} else {
// The 'node' already has a filter that does not match 'type'. If 'type' is AND, then
// combine 'match' with the existing filter by adding an AND. If 'type' is OR, combine
// by adding an OR node.
unique_ptr listFilter;
if (MatchExpression::AND == type) {
listFilter = make_unique();
} else {
verify(MatchExpression::OR == type);
listFilter = make_unique();
}
unique_ptr oldFilter = node->filter->shallowClone();
listFilter->add(oldFilter.release());
listFilter->add(match);
node->filter = std::move(listFilter);
}
}
// static
void QueryPlannerAccess::handleFilter(ScanBuildingState* scanState) {
if (MatchExpression::OR == scanState->root->matchType()) {
handleFilterOr(scanState);
} else if (MatchExpression::AND == scanState->root->matchType()) {
handleFilterAnd(scanState);
} else {
// We must be building leaves for either and AND or an OR.
invariant(0);
}
}
// static
void QueryPlannerAccess::handleFilterOr(ScanBuildingState* scanState) {
MatchExpression* root = scanState->root;
MatchExpression* child = root->getChild(scanState->curChild);
if (scanState->inArrayOperator) {
// We're inside an array operator. The entire array operator expression
// should always be affixed as a filter. We keep 'curChild' in the $and
// for affixing later.
++scanState->curChild;
} else {
if (scanState->tightness < scanState->loosestBounds) {
scanState->loosestBounds = scanState->tightness;
}
// Detach 'child' and add it to 'curOr'.
root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild);
scanState->curOr->getChildVector()->push_back(child);
}
}
// static
void QueryPlannerAccess::handleFilterAnd(ScanBuildingState* scanState) {
MatchExpression* root = scanState->root;
MatchExpression* child = root->getChild(scanState->curChild);
const IndexEntry& index = scanState->indices[scanState->currentIndexNumber];
if (scanState->inArrayOperator) {
// We're inside an array operator. The entire array operator expression
// should always be affixed as a filter. We keep 'curChild' in the $and
// for affixing later.
++scanState->curChild;
} else if (scanState->tightness == IndexBoundsBuilder::EXACT) {
root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild);
delete child;
} else if (scanState->tightness == IndexBoundsBuilder::INEXACT_COVERED &&
(INDEX_TEXT == index.type || !index.multikey)) {
// The bounds are not exact, but the information needed to
// evaluate the predicate is in the index key. Remove the
// MatchExpression from its parent and attach it to the filter
// of the index scan we're building.
//
// We can only use this optimization if the index is NOT multikey.
// Suppose that we had the multikey index {x: 1} and a document
// {x: ["a", "b"]}. Now if we query for {x: /b/} the filter might
// ever only be applied to the index key "a". We'd incorrectly
// conclude that the document does not match the query :( so we
// gotta stick to non-multikey indices.
root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild);
addFilterToSolutionNode(scanState->currentScan.get(), child, root->matchType());
} else {
// We keep curChild in the AND for affixing later.
++scanState->curChild;
}
}
QuerySolutionNode* QueryPlannerAccess::makeIndexScan(const IndexEntry& index,
const CanonicalQuery& query,
const QueryPlannerParams& params,
const BSONObj& startKey,
const BSONObj& endKey) {
QuerySolutionNode* solnRoot = NULL;
// Build an ixscan over the id index, use it, and return it.
IndexScanNode* isn = new IndexScanNode(index);
isn->direction = 1;
isn->maxScan = query.getQueryRequest().getMaxScan();
isn->addKeyMetadata = query.getQueryRequest().returnKey();
isn->bounds.isSimpleRange = true;
isn->bounds.startKey = startKey;
isn->bounds.endKey = endKey;
isn->bounds.boundInclusion = BoundInclusion::kIncludeStartKeyOnly;
isn->queryCollator = query.getCollator();
unique_ptr filter = query.root()->shallowClone();
// If it's find({}) remove the no-op root.
if (MatchExpression::AND == filter->matchType() && (0 == filter->numChildren())) {
solnRoot = isn;
} else {
// TODO: We may not need to do the fetch if the predicates in root are covered. But
// for now it's safe (though *maybe* slower).
unique_ptr fetch = make_unique();
fetch->filter = std::move(filter);
fetch->children.push_back(isn);
solnRoot = fetch.release();
}
return solnRoot;
}
} // namespace mongo