/** * Copyright (C) 2013 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the GNU Affero General Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery #include "mongo/platform/basic.h" #include "mongo/db/query/planner_access.h" #include #include #include #include "mongo/base/owned_pointer_vector.h" #include "mongo/bson/simple_bsonobj_comparator.h" #include "mongo/db/bson/dotted_path_support.h" #include "mongo/db/matcher/expression_array.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_text.h" #include "mongo/db/query/index_bounds_builder.h" #include "mongo/db/query/index_tag.h" #include "mongo/db/query/indexability.h" #include "mongo/db/query/planner_wildcard_helpers.h" #include "mongo/db/query/query_knobs.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/query/query_planner_common.h" #include "mongo/stdx/memory.h" #include "mongo/util/log.h" #include "mongo/util/transitional_tools_do_not_use/vector_spooling.h" namespace { using namespace mongo; namespace wcp = ::mongo::wildcard_planning; namespace dps = ::mongo::dotted_path_support; /** * Text node functors. */ bool isTextNode(const QuerySolutionNode* node) { return STAGE_TEXT == node->getType(); } /** * Casts 'node' to a FetchNode* if it is a FetchNode, otherwise returns null. */ FetchNode* getFetchNode(QuerySolutionNode* node) { if (STAGE_FETCH != node->getType()) { return nullptr; } return static_cast(node); } /** * If 'node' is an index scan node, casts it to IndexScanNode*. If 'node' is a FetchNode with an * IndexScanNode child, then returns a pointer to the child index scan node. Otherwise returns * null. */ const IndexScanNode* getIndexScanNode(const QuerySolutionNode* node) { if (STAGE_IXSCAN == node->getType()) { return static_cast(node); } else if (STAGE_FETCH == node->getType()) { invariant(1U == node->children.size()); const QuerySolutionNode* child = node->children[0]; if (STAGE_IXSCAN == child->getType()) { return static_cast(child); } } return nullptr; } /** * Takes as input two query solution nodes returned by processIndexScans(). If both are * IndexScanNode or FetchNode with an IndexScanNode child and the index scan nodes are identical * (same bounds, same filter, same direction, etc.), then returns true. Otherwise returns false. */ bool scansAreEquivalent(const QuerySolutionNode* lhs, const QuerySolutionNode* rhs) { const IndexScanNode* leftIxscan = getIndexScanNode(lhs); const IndexScanNode* rightIxscan = getIndexScanNode(rhs); if (!leftIxscan || !rightIxscan) { return false; } return *leftIxscan == *rightIxscan; } /** * If all nodes can provide the requested sort, returns a vector expressing which nodes must have * their index scans reversed to provide the sort. Otherwise, returns an empty vector. * 'nodes' must not be empty. */ std::vector canProvideSortWithMergeSort( const std::vector>& nodes, const BSONObj& requestedSort) { invariant(!nodes.empty()); std::vector shouldReverseScan; const auto reverseSort = QueryPlannerCommon::reverseSortObj(requestedSort); for (auto&& node : nodes) { node->computeProperties(); auto sorts = node->getSort(); if (sorts.find(requestedSort) != sorts.end()) { shouldReverseScan.push_back(false); } else if (sorts.find(reverseSort) != sorts.end()) { shouldReverseScan.push_back(true); } else { return {}; } } return shouldReverseScan; } } // namespace namespace mongo { using std::unique_ptr; using std::vector; using stdx::make_unique; std::unique_ptr QueryPlannerAccess::makeCollectionScan( const CanonicalQuery& query, bool tailable, const QueryPlannerParams& params) { // Make the (only) node, a collection scan. auto csn = stdx::make_unique(); csn->name = query.ns(); csn->filter = query.root()->shallowClone(); csn->tailable = tailable; csn->shouldTrackLatestOplogTimestamp = params.options & QueryPlannerParams::TRACK_LATEST_OPLOG_TS; csn->shouldWaitForOplogVisibility = params.options & QueryPlannerParams::OPLOG_SCAN_WAIT_FOR_VISIBLE; // If the hint is {$natural: +-1} this changes the direction of the collection scan. if (!query.getQueryRequest().getHint().isEmpty()) { BSONElement natural = dps::extractElementAtPath(query.getQueryRequest().getHint(), "$natural"); if (!natural.eoo()) { csn->direction = natural.numberInt() >= 0 ? 1 : -1; } } // The sort can specify $natural as well. The sort direction should override the hint // direction if both are specified. const BSONObj& sortObj = query.getQueryRequest().getSort(); if (!sortObj.isEmpty()) { BSONElement natural = dps::extractElementAtPath(sortObj, "$natural"); if (!natural.eoo()) { csn->direction = natural.numberInt() >= 0 ? 1 : -1; } } return std::move(csn); } std::unique_ptr QueryPlannerAccess::makeLeafNode( const CanonicalQuery& query, const IndexEntry& index, size_t pos, const MatchExpression* expr, IndexBoundsBuilder::BoundsTightness* tightnessOut) { // We're guaranteed that all GEO_NEARs are first. This slightly violates the "sort index // predicates by their position in the compound index" rule but GEO_NEAR isn't an ixscan. // This saves our bacon when we have {foo: 1, bar: "2dsphere"} and the predicate on bar is a // $near. If we didn't get the GEO_NEAR first we'd create an IndexScanNode and later cast // it to a GeoNear2DSphereNode // // This should gracefully deal with the case where we have a pred over foo but no geo clause // over bar. In that case there is no GEO_NEAR to appear first and it's treated like a // straight ixscan. if (MatchExpression::GEO_NEAR == expr->matchType()) { // We must not keep the expression node around. *tightnessOut = IndexBoundsBuilder::EXACT; auto nearExpr = static_cast(expr); BSONElement elt = index.keyPattern.firstElement(); bool indexIs2D = (String == elt.type() && "2d" == elt.String()); if (indexIs2D) { auto ret = stdx::make_unique(index); ret->nq = &nearExpr->getData(); ret->baseBounds.fields.resize(index.keyPattern.nFields()); if (NULL != query.getProj()) { ret->addPointMeta = query.getProj()->wantGeoNearPoint(); ret->addDistMeta = query.getProj()->wantGeoNearDistance(); } return std::move(ret); } else { auto ret = stdx::make_unique(index); ret->nq = &nearExpr->getData(); ret->baseBounds.fields.resize(index.keyPattern.nFields()); if (NULL != query.getProj()) { ret->addPointMeta = query.getProj()->wantGeoNearPoint(); ret->addDistMeta = query.getProj()->wantGeoNearDistance(); } return std::move(ret); } } else if (MatchExpression::TEXT == expr->matchType()) { // We must not keep the expression node around. *tightnessOut = IndexBoundsBuilder::EXACT; auto textExpr = static_cast(expr); auto ret = stdx::make_unique(index); ret->ftsQuery = textExpr->getFTSQuery().clone(); // Count the number of prefix fields before the "text" field. for (auto&& keyPatternElt : ret->index.keyPattern) { // We know that the only key pattern with a type of String is the _fts field // which is immediately after all prefix fields. if (BSONType::String == keyPatternElt.type()) { break; } ++(ret->numPrefixFields); } return std::move(ret); } else { // Note that indexKeyPattern.firstElement().fieldName() may not equal expr->path() // because expr might be inside an array operator that provides a path prefix. auto isn = stdx::make_unique(index); isn->bounds.fields.resize(index.keyPattern.nFields()); isn->addKeyMetadata = query.getQueryRequest().returnKey(); isn->queryCollator = query.getCollator(); // Get the ixtag->pos-th element of the index key pattern. // TODO: cache this instead/with ixtag->pos? BSONObjIterator it(index.keyPattern); BSONElement keyElt = it.next(); for (size_t i = 0; i < pos; ++i) { verify(it.more()); keyElt = it.next(); } verify(!keyElt.eoo()); IndexBoundsBuilder::translate(expr, keyElt, index, &isn->bounds.fields[pos], tightnessOut); return std::move(isn); } } bool QueryPlannerAccess::shouldMergeWithLeaf(const MatchExpression* expr, const ScanBuildingState& scanState) { const QuerySolutionNode* node = scanState.currentScan.get(); if (NULL == node || NULL == expr) { return false; } if (NULL == scanState.ixtag) { return false; } if (scanState.currentIndexNumber != scanState.ixtag->index) { return false; } size_t pos = scanState.ixtag->pos; const IndexEntry& index = scanState.indices[scanState.currentIndexNumber]; const MatchExpression::MatchType mergeType = scanState.root->matchType(); const StageType type = node->getType(); const MatchExpression::MatchType exprType = expr->matchType(); // // First handle special solution tree leaf types. In general, normal index bounds // building is not used for special leaf types, and hence we cannot merge leaves. // // This rule is always true for OR, but there are exceptions for AND. // Specifically, we can often merge a predicate with a special leaf type // by adding a filter to the special leaf type. // if (STAGE_TEXT == type) { // Currently only one text predicate is allowed, but to be safe, make sure that we // do not try to merge two text predicates. return MatchExpression::AND == mergeType && MatchExpression::TEXT != exprType; } if (STAGE_GEO_NEAR_2D == type || STAGE_GEO_NEAR_2DSPHERE == type) { // Currently only one GEO_NEAR is allowed, but to be safe, make sure that we // do not try to merge two GEO_NEAR predicates. return MatchExpression::AND == mergeType && MatchExpression::GEO_NEAR != exprType; } // // If we're here, then we're done checking for special leaf nodes, and the leaf // must be a regular index scan. // invariant(type == STAGE_IXSCAN); const IndexScanNode* scan = static_cast(node); const IndexBounds* boundsToFillOut = &scan->bounds; if (boundsToFillOut->fields[pos].name.empty()) { // Bounds have yet to be assigned for the 'pos' position in the index. The plan enumerator // should have told us that it is safe to compound bounds in this case. invariant(scanState.ixtag->canCombineBounds); return true; } else { // Bounds have already been assigned for the 'pos' position in the index. if (MatchExpression::AND == mergeType) { // The bounds on the 'pos' position in the index would be intersected if we merged these // two leaf expressions. if (!scanState.ixtag->canCombineBounds) { // If the plan enumerator told us that it isn't safe to intersect bounds in this // case, then it must be because we're using a multikey index. invariant(index.multikey); } return scanState.ixtag->canCombineBounds; } else { // The bounds will be unionized. return true; } } } void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingState* scanState) { QuerySolutionNode* node = scanState->currentScan.get(); invariant(NULL != node); const MatchExpression::MatchType mergeType = scanState->root->matchType(); size_t pos = scanState->ixtag->pos; const IndexEntry& index = scanState->indices[scanState->currentIndexNumber]; const StageType type = node->getType(); if (STAGE_TEXT == type) { auto textNode = static_cast(node); if (pos < textNode->numPrefixFields) { // This predicate is assigned to one of the prefix fields of the text index. Such // predicates must always be equalities and must always be attached to the TEXT node. In // order to ensure this happens, we assign INEXACT_COVERED tightness. scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED; } else { // The predicate is assigned to one of the trailing fields of the text index. We // currently don't generate bounds for predicates assigned to trailing fields of a text // index, but rather attempt to attach a covered filter. However, certain predicates can // never be correctly covered (e.g. $exists), so we assign the tightness accordingly. scanState->tightness = IndexBoundsBuilder::canUseCoveredMatching(expr, index) ? IndexBoundsBuilder::INEXACT_COVERED : IndexBoundsBuilder::INEXACT_FETCH; } return; } IndexBounds* boundsToFillOut = NULL; if (STAGE_GEO_NEAR_2D == type) { invariant(INDEX_2D == index.type); // 2D indexes have a special format - the "2d" field stores a normally-indexed BinData // field, but additional array fields are *not* exploded into multi-keys - they are stored // directly as arrays in the index. Also, no matter what the index expression, the "2d" // field is always first. // // This means that we can only generically accumulate bounds for 2D indexes over the first // "2d" field (pos == 0) - MatchExpressions over other fields in the 2D index may be covered // (can be evaluated using only the 2D index key). The additional fields must not affect // the index scan bounds, since they are not stored in an IndexScan-compatible format. if (pos > 0) { // The predicate is over a trailing field of the "2d" index. If possible, we assign it // as a covered filter (the INEXACT_COVERED case). Otherwise, the filter must be // evaluated after fetching the full documents. scanState->tightness = IndexBoundsBuilder::canUseCoveredMatching(expr, index) ? IndexBoundsBuilder::INEXACT_COVERED : IndexBoundsBuilder::INEXACT_FETCH; return; } // We may have other $geoPredicates on a near index - generate bounds for these GeoNear2DNode* gn = static_cast(node); boundsToFillOut = &gn->baseBounds; } else if (STAGE_GEO_NEAR_2DSPHERE == type) { GeoNear2DSphereNode* gn = static_cast(node); boundsToFillOut = &gn->baseBounds; } else { verify(type == STAGE_IXSCAN); IndexScanNode* scan = static_cast(node); // See STAGE_GEO_NEAR_2D above - 2D indexes can only accumulate scan bounds over the first // "2d" field (pos == 0). if (INDEX_2D == index.type && pos > 0) { // The predicate is over a trailing field of the "2d" index. If possible, we assign it // as a covered filter (the INEXACT_COVERED case). Otherwise, the filter must be // evaluated after fetching the full documents. scanState->tightness = IndexBoundsBuilder::canUseCoveredMatching(expr, index) ? IndexBoundsBuilder::INEXACT_COVERED : IndexBoundsBuilder::INEXACT_FETCH; return; } boundsToFillOut = &scan->bounds; } // Get the ixtag->pos-th element of the index key pattern. // TODO: cache this instead/with ixtag->pos? BSONObjIterator it(index.keyPattern); BSONElement keyElt = it.next(); for (size_t i = 0; i < pos; ++i) { verify(it.more()); keyElt = it.next(); } verify(!keyElt.eoo()); scanState->tightness = IndexBoundsBuilder::INEXACT_FETCH; verify(boundsToFillOut->fields.size() > pos); OrderedIntervalList* oil = &boundsToFillOut->fields[pos]; if (boundsToFillOut->fields[pos].name.empty()) { IndexBoundsBuilder::translate(expr, keyElt, index, oil, &scanState->tightness); } else { if (MatchExpression::AND == mergeType) { IndexBoundsBuilder::translateAndIntersect( expr, keyElt, index, oil, &scanState->tightness); } else { verify(MatchExpression::OR == mergeType); IndexBoundsBuilder::translateAndUnion(expr, keyElt, index, oil, &scanState->tightness); } } } void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntry& index) { TextNode* tn = static_cast(node); // If there's no prefix, the filter is already on the node and the index prefix is null. // We can just return. if (!tn->numPrefixFields) { return; } // We can't create a text stage if there aren't EQ predicates on its prefix terms. So // if we've made it this far, we should have collected the prefix predicates in the // filter. invariant(NULL != tn->filter.get()); MatchExpression* textFilterMe = tn->filter.get(); BSONObjBuilder prefixBob; if (MatchExpression::AND != textFilterMe->matchType()) { // Only one prefix term. invariant(1u == tn->numPrefixFields); // Sanity check: must be an EQ. invariant(MatchExpression::EQ == textFilterMe->matchType()); EqualityMatchExpression* eqExpr = static_cast(textFilterMe); prefixBob.append(eqExpr->getData()); tn->filter.reset(); } else { invariant(MatchExpression::AND == textFilterMe->matchType()); // Indexed by the keyPattern position index assignment. We want to add // prefixes in order but we must order them first. vector prefixExprs(tn->numPrefixFields, nullptr); AndMatchExpression* amExpr = static_cast(textFilterMe); invariant(amExpr->numChildren() >= tn->numPrefixFields); // Look through the AND children. The prefix children we want to // stash in prefixExprs. size_t curChild = 0; while (curChild < amExpr->numChildren()) { MatchExpression* child = amExpr->getChild(curChild); IndexTag* ixtag = static_cast(child->getTag()); invariant(NULL != ixtag); // Skip this child if it's not part of a prefix, or if we've already assigned a // predicate to this prefix position. if (ixtag->pos >= tn->numPrefixFields || prefixExprs[ixtag->pos] != NULL) { ++curChild; continue; } // prefixExprs takes ownership of 'child'. prefixExprs[ixtag->pos] = child; amExpr->getChildVector()->erase(amExpr->getChildVector()->begin() + curChild); // Don't increment curChild. } // Go through the prefix equalities in order and create an index prefix out of them. for (size_t i = 0; i < prefixExprs.size(); ++i) { MatchExpression* prefixMe = prefixExprs[i]; invariant(NULL != prefixMe); invariant(MatchExpression::EQ == prefixMe->matchType()); EqualityMatchExpression* eqExpr = static_cast(prefixMe); prefixBob.append(eqExpr->getData()); // We removed this from the AND expression that owned it, so we must clean it // up ourselves. delete prefixMe; } // Clear out an empty $and. if (0 == amExpr->numChildren()) { tn->filter.reset(); } else if (1 == amExpr->numChildren()) { // Clear out unsightly only child of $and MatchExpression* child = amExpr->getChild(0); amExpr->getChildVector()->clear(); // Deletes current filter which is amExpr. tn->filter.reset(child); } } tn->indexPrefix = prefixBob.obj(); } bool QueryPlannerAccess::orNeedsFetch(const ScanBuildingState* scanState) { if (scanState->loosestBounds == IndexBoundsBuilder::EXACT) { return false; } else if (scanState->loosestBounds == IndexBoundsBuilder::INEXACT_FETCH) { return true; } else { invariant(scanState->loosestBounds == IndexBoundsBuilder::INEXACT_COVERED); const IndexEntry& index = scanState->indices[scanState->currentIndexNumber]; return index.multikey; } } void QueryPlannerAccess::finishAndOutputLeaf(ScanBuildingState* scanState, vector>* out) { finishLeafNode(scanState->currentScan.get(), scanState->indices[scanState->currentIndexNumber]); if (MatchExpression::OR == scanState->root->matchType()) { if (orNeedsFetch(scanState)) { // In order to correctly evaluate the predicates for this index, we have to // fetch the full documents. Add a fetch node above the index scan whose filter // includes *all* of the predicates used to generate the ixscan. auto fetch = stdx::make_unique(); // Takes ownership. fetch->filter = std::move(scanState->curOr); // Takes ownership. fetch->children.push_back(scanState->currentScan.release()); scanState->currentScan = std::move(fetch); } else if (scanState->loosestBounds == IndexBoundsBuilder::INEXACT_COVERED) { // This an OR, at least one of the predicates used to generate 'currentScan' // is inexact covered, but none is inexact fetch. This means that we can put // these predicates, joined by an $or, as filters on the index scan. This avoids // a fetch and allows the predicates to be covered by the index. // // Ex. // Say we have index {a: 1} and query {$or: [{a: /foo/}, {a: /bar/}]}. // The entire query, {$or: [{a: /foo/}, {a: /bar/}]}, should be a filter // in the index scan stage itself. scanState->currentScan->filter = std::move(scanState->curOr); } } out->push_back(std::move(scanState->currentScan)); } void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntry& index) { const StageType type = node->getType(); if (STAGE_TEXT == type) { return finishTextNode(node, index); } IndexEntry* nodeIndex = nullptr; IndexBounds* bounds = nullptr; if (STAGE_GEO_NEAR_2D == type) { GeoNear2DNode* gnode = static_cast(node); bounds = &gnode->baseBounds; nodeIndex = &gnode->index; } else if (STAGE_GEO_NEAR_2DSPHERE == type) { GeoNear2DSphereNode* gnode = static_cast(node); bounds = &gnode->baseBounds; nodeIndex = &gnode->index; } else { verify(type == STAGE_IXSCAN); IndexScanNode* scan = static_cast(node); nodeIndex = &scan->index; bounds = &scan->bounds; // If this is a $** index, update and populate the keyPattern, bounds, and multikeyPaths. if (index.type == IndexType::INDEX_WILDCARD) { wcp::finalizeWildcardIndexScanConfiguration(scan); } } // Find the first field in the scan's bounds that was not filled out. // TODO: could cache this. size_t firstEmptyField = 0; for (firstEmptyField = 0; firstEmptyField < bounds->fields.size(); ++firstEmptyField) { if (bounds->fields[firstEmptyField].name.empty()) { verify(bounds->fields[firstEmptyField].intervals.empty()); break; } } // All fields are filled out with bounds, nothing to do. if (firstEmptyField == bounds->fields.size()) { return IndexBoundsBuilder::alignBounds(bounds, nodeIndex->keyPattern); } // Skip ahead to the firstEmptyField-th element, where we begin filling in bounds. BSONObjIterator it(nodeIndex->keyPattern); for (size_t i = 0; i < firstEmptyField; ++i) { verify(it.more()); it.next(); } // For each field in the key... while (it.more()) { BSONElement kpElt = it.next(); // There may be filled-in fields to the right of the firstEmptyField; for instance, the // index {loc:"2dsphere", x:1} with a predicate over x and a near search over loc. if (bounds->fields[firstEmptyField].name.empty()) { verify(bounds->fields[firstEmptyField].intervals.empty()); IndexBoundsBuilder::allValuesForField(kpElt, &bounds->fields[firstEmptyField]); } ++firstEmptyField; } // Make sure that the length of the key is the length of the bounds we started. verify(firstEmptyField == bounds->fields.size()); // We create bounds assuming a forward direction but can easily reverse bounds to align // according to our desired direction. IndexBoundsBuilder::alignBounds(bounds, nodeIndex->keyPattern); } void QueryPlannerAccess::findElemMatchChildren(const MatchExpression* node, vector* out, vector* subnodesOut) { for (size_t i = 0; i < node->numChildren(); ++i) { MatchExpression* child = node->getChild(i); if (Indexability::isBoundsGenerating(child) && NULL != child->getTag()) { out->push_back(child); } else if (MatchExpression::AND == child->matchType() || Indexability::arrayUsesIndexOnChildren(child)) { findElemMatchChildren(child, out, subnodesOut); } else if (NULL != child->getTag()) { subnodesOut->push_back(child); } } } std::vector> QueryPlannerAccess::collapseEquivalentScans( std::vector> scans) { invariant(scans.size() > 0); // Scans that need to be collapsed will be adjacent to each other in the list due to how we // sort the query predicate. We step through the list, either merging the current scan into // the last scan in 'collapsedScans', or adding a new entry to 'collapsedScans' if it can't // be merged. std::vector> collapsedScans; collapsedScans.push_back(std::move(scans[0])); for (size_t i = 1; i < scans.size(); ++i) { if (scansAreEquivalent(collapsedScans.back().get(), scans[i].get())) { // We collapse the entry from 'ownedScans' into the back of 'collapsedScans'. std::unique_ptr collapseFrom = std::move(scans[i]); FetchNode* collapseFromFetch = getFetchNode(collapseFrom.get()); FetchNode* collapseIntoFetch = getFetchNode(collapsedScans.back().get()); // If there's no filter associated with a fetch node on 'collapseFrom', all we have to // do is clear the filter on the node that we are collapsing into. if (!collapseFromFetch || !collapseFromFetch->filter.get()) { if (collapseIntoFetch) { collapseIntoFetch->filter.reset(); } continue; } // If there's no filter associated with a fetch node on the back of the 'collapsedScans' // list, then there's nothing more to do. if (!collapseIntoFetch || !collapseIntoFetch->filter.get()) { continue; } // Both the 'from' and 'into' nodes have filters. We join them with an // OrMatchExpression. std::unique_ptr collapsedFilter = stdx::make_unique(); collapsedFilter->add(collapseFromFetch->filter.release()); collapsedFilter->add(collapseIntoFetch->filter.release()); // Normalize the filter and add it to 'into'. collapseIntoFetch->filter = MatchExpression::optimize(std::move(collapsedFilter)); } else { // Scans are not equivalent and can't be collapsed. collapsedScans.push_back(std::move(scans[i])); } } invariant(collapsedScans.size() > 0); return collapsedScans; } bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query, MatchExpression* root, bool inArrayOperator, const std::vector& indices, const QueryPlannerParams& params, std::vector>* out) { // Initialize the ScanBuildingState. ScanBuildingState scanState(root, inArrayOperator, indices); while (scanState.curChild < root->numChildren()) { MatchExpression* child = root->getChild(scanState.curChild); // If there is no tag, it's not using an index. We've sorted our children such that the // children with tags are first, so we stop now. if (NULL == child->getTag()) { break; } scanState.ixtag = static_cast(child->getTag()); // If there's a tag it must be valid. verify(IndexTag::kNoIndex != scanState.ixtag->index); // If the child can't use an index on its own field (and the child is not a negation // of a bounds-generating expression), then it's indexed by virtue of one of // its children having an index. // // NOTE: If the child is logical, it could possibly collapse into a single ixscan. we // ignore this for now. if (!Indexability::isBoundsGenerating(child)) { // If we're here, then the child is indexed by virtue of its children. // In most cases this means that we recursively build indexed data // access on 'child'. if (!processIndexScansSubnode(query, &scanState, params, out)) { return false; } continue; } // If we're here, we now know that 'child' can use an index directly and the index is // over the child's field. // If 'child' is a NOT, then the tag we're interested in is on the NOT's // child node. if (MatchExpression::NOT == child->matchType()) { scanState.ixtag = static_cast(child->getChild(0)->getTag()); invariant(IndexTag::kNoIndex != scanState.ixtag->index); } // If the child we're looking at uses a different index than the current index scan, add // the current index scan to the output as we're done with it. The index scan created // by the child then becomes our new current index scan. Note that the current scan // could be NULL, in which case we don't output it. The rest of the logic is identical. // // If the child uses the same index as the current index scan, we may be able to merge // the bounds for the two scans. // // Guiding principle: must the values we're testing come from the same array in the // document? If so, we can combine bounds (via intersection or compounding). If not, // we can't. // // If the index is NOT multikey, it's always semantically correct to combine bounds, // as there are no arrays to worry about. // // If the index is multikey, there are arrays of values. There are several // complications in the multikey case that have to be obeyed both by the enumerator // and here as we try to merge predicates into query solution leaves. The hairy // details of these rules are documented near the top of planner_access.h. if (shouldMergeWithLeaf(child, scanState)) { // The child uses the same index we're currently building a scan for. Merge // the bounds and filters. verify(scanState.currentIndexNumber == scanState.ixtag->index); scanState.tightness = IndexBoundsBuilder::INEXACT_FETCH; mergeWithLeafNode(child, &scanState); handleFilter(&scanState); } else { if (NULL != scanState.currentScan.get()) { // Output the current scan before starting to construct a new out. finishAndOutputLeaf(&scanState, out); } else { verify(IndexTag::kNoIndex == scanState.currentIndexNumber); } // Reset state before producing a new leaf. scanState.resetForNextScan(scanState.ixtag); scanState.currentScan = makeLeafNode(query, indices[scanState.currentIndexNumber], scanState.ixtag->pos, child, &scanState.tightness); handleFilter(&scanState); } } // Output the scan we're done with, if it exists. if (NULL != scanState.currentScan.get()) { finishAndOutputLeaf(&scanState, out); } return true; } bool QueryPlannerAccess::processIndexScansElemMatch( const CanonicalQuery& query, ScanBuildingState* scanState, const QueryPlannerParams& params, std::vector>* out) { MatchExpression* root = scanState->root; MatchExpression* child = root->getChild(scanState->curChild); const vector& indices = scanState->indices; // We have an AND with an ELEM_MATCH_OBJECT child. The plan enumerator produces // index taggings which indicate that we should try to compound with // predicates retrieved from inside the subtree rooted at the ELEM_MATCH. // In order to obey the enumerator's tagging, we need to retrieve these // predicates from inside the $elemMatch, and try to merge them with // the current index scan. // Contains tagged predicates from inside the tree rooted at 'child' // which are logically part of the AND. vector emChildren; // Contains tagged nodes that are not logically part of the AND and // cannot use the index directly (e.g. OR nodes which are tagged to // be indexed). vector emSubnodes; // Populate 'emChildren' and 'emSubnodes'. findElemMatchChildren(child, &emChildren, &emSubnodes); // Recursively build data access for the nodes inside 'emSubnodes'. for (size_t i = 0; i < emSubnodes.size(); ++i) { MatchExpression* subnode = emSubnodes[i]; if (!Indexability::isBoundsGenerating(subnode)) { // 'subnode' is beneath an $elemMatch. When planning the children of array operators, we // keep ownership of the match expression node. Therefore, we pass nullptr for the // 'ownedRoot' argument. auto childSolution = _buildIndexedDataAccess(query, subnode, nullptr, indices, params); // _buildIndexedDataAccess(...) returns NULL in error conditions, when it is unable to // construct a query solution from a tagged match expression tree. If we are unable to // construct a solution according to the instructions from the enumerator, then we bail // out early (by returning false) rather than continuing on and potentially constructing // an invalid solution tree. if (!childSolution) { return false; } // Output the resulting solution tree. out->push_back(std::move(childSolution)); } } // For each predicate in 'emChildren', try to merge it with the current index scan. // // This loop is similar to that in processIndexScans(...), except it does not call into // handleFilters(...). Instead, we leave the entire $elemMatch filter intact. This way, // the complete $elemMatch expression will be affixed as a filter later on. for (size_t i = 0; i < emChildren.size(); ++i) { MatchExpression* emChild = emChildren[i]; invariant(NULL != emChild->getTag()); scanState->ixtag = static_cast(emChild->getTag()); // If 'emChild' is a NOT, then the tag we're interested in is on the NOT's // child node. if (MatchExpression::NOT == emChild->matchType()) { invariant(NULL != emChild->getChild(0)->getTag()); scanState->ixtag = static_cast(emChild->getChild(0)->getTag()); invariant(IndexTag::kNoIndex != scanState->ixtag->index); } if (shouldMergeWithLeaf(emChild, *scanState)) { // The child uses the same index we're currently building a scan for. Merge // the bounds and filters. verify(scanState->currentIndexNumber == scanState->ixtag->index); scanState->tightness = IndexBoundsBuilder::INEXACT_FETCH; mergeWithLeafNode(emChild, scanState); } else { if (NULL != scanState->currentScan.get()) { finishAndOutputLeaf(scanState, out); } else { verify(IndexTag::kNoIndex == scanState->currentIndexNumber); } scanState->currentIndexNumber = scanState->ixtag->index; scanState->tightness = IndexBoundsBuilder::INEXACT_FETCH; scanState->currentScan = makeLeafNode(query, indices[scanState->currentIndexNumber], scanState->ixtag->pos, emChild, &scanState->tightness); } } // We're done processing the $elemMatch child. We leave it hanging off // it's AND parent so that it will be affixed as a filter later on, // and move on to the next child of the AND. ++scanState->curChild; return true; } bool QueryPlannerAccess::processIndexScansSubnode( const CanonicalQuery& query, ScanBuildingState* scanState, const QueryPlannerParams& params, std::vector>* out) { MatchExpression* root = scanState->root; MatchExpression* child = root->getChild(scanState->curChild); const vector& indices = scanState->indices; bool inArrayOperator = scanState->inArrayOperator; // We may detach the current child from the tree and assume ownership. std::unique_ptr ownedChild; if (MatchExpression::AND == root->matchType() && MatchExpression::ELEM_MATCH_OBJECT == child->matchType()) { return processIndexScansElemMatch(query, scanState, params, out); } else if (!inArrayOperator) { // The logical sub-tree is responsible for fully evaluating itself. Any required filters or // fetches are already hung on it. As such, we remove the filter branch from our tree and // assume ownership of it. root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild); ownedChild.reset(child); } else { ++scanState->curChild; } // If inArrayOperator: takes ownership of child, which is OK, since we detached // child from root. auto childSolution = _buildIndexedDataAccess(query, child, std::move(ownedChild), indices, params); if (!childSolution) { return false; } out->push_back(std::move(childSolution)); return true; } std::unique_ptr QueryPlannerAccess::buildIndexedAnd( const CanonicalQuery& query, MatchExpression* root, std::unique_ptr ownedRoot, const vector& indices, const QueryPlannerParams& params) { // Clone the match expression before passing it to processIndexScans(), as it may trim // predicates. If we end up with an index intersection solution, then we use our copy of the // match expression to be sure that the FETCH stage will recheck the entire predicate. It is not // correct to trim predicates for index intersection plans, as this can lead to spurious matches // (see SERVER-16750). auto clonedRoot = root->shallowClone(); std::vector> ixscanNodes; const bool inArrayOperator = !ownedRoot; if (!processIndexScans(query, root, inArrayOperator, indices, params, &ixscanNodes)) { return NULL; } // // Process all non-indexed predicates. We hang these above the AND with a fetch and // filter. // // This is the node we're about to return. std::unique_ptr andResult; // We must use an index for at least one child of the AND. We shouldn't be here if this // isn't the case. verify(ixscanNodes.size() >= 1); // Short-circuit: an AND of one child is just the child. if (ixscanNodes.size() == 1) { andResult = std::move(ixscanNodes[0]); } else { // $** indexes are prohibited from participating in either AND_SORTED or AND_HASH. const bool wildcardIndexInvolvedInIntersection = std::any_of(ixscanNodes.begin(), ixscanNodes.end(), [](const auto& ixScan) { return ixScan->getType() == StageType::STAGE_IXSCAN && static_cast(ixScan.get())->index.type == INDEX_WILDCARD; }); if (wildcardIndexInvolvedInIntersection) { return nullptr; } // Figure out if we want AndHashNode or AndSortedNode. bool allSortedByDiskLoc = true; for (size_t i = 0; i < ixscanNodes.size(); ++i) { if (!ixscanNodes[i]->sortedByDiskLoc()) { allSortedByDiskLoc = false; break; } } if (allSortedByDiskLoc) { auto asn = stdx::make_unique(); asn->addChildren(std::move(ixscanNodes)); andResult = std::move(asn); } else if (internalQueryPlannerEnableHashIntersection.load()) { { auto ahn = stdx::make_unique(); ahn->addChildren(std::move(ixscanNodes)); andResult = std::move(ahn); } // The AndHashNode provides the sort order of its last child. If any of the // possible subnodes of AndHashNode provides the sort order we care about, we put // that one last. for (size_t i = 0; i < andResult->children.size(); ++i) { andResult->children[i]->computeProperties(); const BSONObjSet& sorts = andResult->children[i]->getSort(); if (sorts.end() != sorts.find(query.getQueryRequest().getSort())) { std::swap(andResult->children[i], andResult->children.back()); break; } } } else { // We can't use sort-based intersection, and hash-based intersection is disabled. // Clean up the index scans and bail out by returning NULL. LOG(5) << "Can't build index intersection solution: " << "AND_SORTED is not possible and AND_HASH is disabled."; return nullptr; } } // Don't bother doing any kind of fetch analysis lite if we're doing it anyway above us. if (inArrayOperator) { return andResult; } if (andResult->getType() == STAGE_AND_HASH || andResult->getType() == STAGE_AND_SORTED) { // We got an index intersection solution, so we aren't allowed to answer predicates exactly // using the index. This is because the index intersection stage finds documents that match // each index's predicate, but the document isn't guaranteed to be in a state where it // matches all indexed predicates simultaneously. Therefore, it is necessary to add a fetch // stage which will explicitly evaluate the entire predicate (see SERVER-16750). invariant(clonedRoot); auto fetch = stdx::make_unique(); fetch->filter = std::move(clonedRoot); // Takes ownership of 'andResult'. fetch->children.push_back(andResult.release()); return std::move(fetch); } // If there are any nodes still attached to the AND, we can't answer them using the // index, so we put a fetch with filter. if (root->numChildren() > 0) { auto fetch = stdx::make_unique(); verify(ownedRoot); if (ownedRoot->numChildren() == 1) { // An $and of one thing is that thing. MatchExpression* child = ownedRoot->getChild(0); ownedRoot->getChildVector()->clear(); // Takes ownership. fetch->filter.reset(child); // 'autoRoot' will delete the empty $and. } else { // root->numChildren() > 1 // Takes ownership. fetch->filter = std::move(ownedRoot); } // takes ownership fetch->children.push_back(andResult.release()); andResult = std::move(fetch); } else { // root has no children, let autoRoot get rid of it when it goes out of scope. } return andResult; } std::unique_ptr QueryPlannerAccess::buildIndexedOr( const CanonicalQuery& query, MatchExpression* root, std::unique_ptr ownedRoot, const vector& indices, const QueryPlannerParams& params) { const bool inArrayOperator = !ownedRoot; std::vector> ixscanNodes; if (!processIndexScans(query, root, inArrayOperator, indices, params, &ixscanNodes)) { return NULL; } // Unlike an AND, an OR cannot have filters hanging off of it. We stop processing // when any of our children lack index tags. If a node lacks an index tag it cannot // be answered via an index. if (!inArrayOperator && 0 != root->numChildren()) { warning() << "planner OR error, non-indexed child of OR."; // We won't enumerate an OR without indices for each child, so this isn't an issue, even // if we have an AND with an OR child -- we won't get here unless the OR is fully // indexed. return NULL; } // If all index scans are identical, then we collapse them into a single scan. This prevents // us from creating OR plans where the branches of the OR perform duplicate work. ixscanNodes = collapseEquivalentScans(std::move(ixscanNodes)); std::unique_ptr orResult; // An OR of one node is just that node. if (1 == ixscanNodes.size()) { orResult = std::move(ixscanNodes[0]); } else { std::vector shouldReverseScan; if (!query.getQueryRequest().getSort().isEmpty()) { // If all ixscanNodes can provide the sort, shouldReverseScan is populated with which // scans to reverse. shouldReverseScan = canProvideSortWithMergeSort(ixscanNodes, query.getQueryRequest().getSort()); } if (!shouldReverseScan.empty()) { // Each node can provide either the requested sort, or the reverse of the requested // sort. invariant(ixscanNodes.size() == shouldReverseScan.size()); for (size_t i = 0; i < ixscanNodes.size(); ++i) { if (shouldReverseScan[i]) { QueryPlannerCommon::reverseScans(ixscanNodes[i].get()); } } auto msn = stdx::make_unique(); msn->sort = query.getQueryRequest().getSort(); msn->addChildren(std::move(ixscanNodes)); orResult = std::move(msn); } else { auto orn = stdx::make_unique(); orn->addChildren(std::move(ixscanNodes)); orResult = std::move(orn); } } // Evaluate text nodes first to ensure that text scores are available. // Move text nodes to front of vector. std::stable_partition(orResult->children.begin(), orResult->children.end(), isTextNode); // OR must have an index for each child, so we should have detached all children from // 'root', and there's nothing useful to do with an empty or MatchExpression. We let it die // via autoRoot. return orResult; } std::unique_ptr QueryPlannerAccess::buildIndexedDataAccess( const CanonicalQuery& query, std::unique_ptr root, const vector& indices, const QueryPlannerParams& params) { MatchExpression* unownedRoot = root.get(); return _buildIndexedDataAccess(query, unownedRoot, std::move(root), indices, params); } std::unique_ptr QueryPlannerAccess::_buildIndexedDataAccess( const CanonicalQuery& query, MatchExpression* root, std::unique_ptr ownedRoot, const vector& indices, const QueryPlannerParams& params) { if (root->getCategory() == MatchExpression::MatchCategory::kLogical && !Indexability::isBoundsGeneratingNot(root)) { if (MatchExpression::AND == root->matchType()) { // Takes ownership of root. return buildIndexedAnd(query, root, std::move(ownedRoot), indices, params); } else if (MatchExpression::OR == root->matchType()) { // Takes ownership of root. return buildIndexedOr(query, root, std::move(ownedRoot), indices, params); } else { return nullptr; } } else { if (!root->getTag()) { // No index to use here, not in the context of logical operator, so we're SOL. return nullptr; } else if (Indexability::isBoundsGenerating(root)) { // Make an index scan over the tagged index #. IndexTag* tag = static_cast(root->getTag()); IndexBoundsBuilder::BoundsTightness tightness = IndexBoundsBuilder::EXACT; auto soln = makeLeafNode(query, indices[tag->index], tag->pos, root, &tightness); verify(NULL != soln); finishLeafNode(soln.get(), indices[tag->index]); if (!ownedRoot) { // We're performing access planning for the child of an array operator such as // $elemMatch value. return soln; } // If the bounds are exact, the set of documents that satisfy the predicate is // exactly equal to the set of documents that the scan provides. // // If the bounds are not exact, the set of documents returned from the scan is a // superset of documents that satisfy the predicate, and we must check the // predicate. if (tightness == IndexBoundsBuilder::EXACT) { return soln; } else if (tightness == IndexBoundsBuilder::INEXACT_COVERED && !indices[tag->index].multikey) { verify(NULL == soln->filter.get()); soln->filter = std::move(ownedRoot); return soln; } else { auto fetch = stdx::make_unique(); fetch->filter = std::move(ownedRoot); fetch->children.push_back(soln.release()); return std::move(fetch); } } else if (Indexability::arrayUsesIndexOnChildren(root)) { std::unique_ptr solution; invariant(root->matchType() == MatchExpression::ELEM_MATCH_OBJECT); // The child is an AND. invariant(1 == root->numChildren()); // Recursively build a data access plan for the child of the $elemMatch object. We // maintain ownership of 'ownedRoot'. solution = _buildIndexedDataAccess(query, root->getChild(0), nullptr, indices, params); if (!solution) { return nullptr; } // There may be an array operator above us. if (!ownedRoot) { return solution; } auto fetch = stdx::make_unique(); fetch->filter = std::move(ownedRoot); fetch->children.push_back(solution.release()); return std::move(fetch); } } return nullptr; } std::unique_ptr QueryPlannerAccess::scanWholeIndex( const IndexEntry& index, const CanonicalQuery& query, const QueryPlannerParams& params, int direction) { std::unique_ptr solnRoot; // Build an ixscan over the id index, use it, and return it. unique_ptr isn = make_unique(index); isn->addKeyMetadata = query.getQueryRequest().returnKey(); isn->queryCollator = query.getCollator(); IndexBoundsBuilder::allValuesBounds(index.keyPattern, &isn->bounds); if (-1 == direction) { QueryPlannerCommon::reverseScans(isn.get()); isn->direction = -1; } unique_ptr filter = query.root()->shallowClone(); // If it's find({}) remove the no-op root. if (MatchExpression::AND == filter->matchType() && (0 == filter->numChildren())) { solnRoot = std::move(isn); } else { // TODO: We may not need to do the fetch if the predicates in root are covered. But // for now it's safe (though *maybe* slower). unique_ptr fetch = make_unique(); fetch->filter = std::move(filter); fetch->children.push_back(isn.release()); solnRoot = std::move(fetch); } return solnRoot; } void QueryPlannerAccess::addFilterToSolutionNode(QuerySolutionNode* node, MatchExpression* match, MatchExpression::MatchType type) { if (NULL == node->filter) { node->filter.reset(match); } else if (type == node->filter->matchType()) { // The 'node' already has either an AND or OR filter that matches 'type'. Add 'match' as // another branch of the filter. ListOfMatchExpression* listFilter = static_cast(node->filter.get()); listFilter->add(match); } else { // The 'node' already has a filter that does not match 'type'. If 'type' is AND, then // combine 'match' with the existing filter by adding an AND. If 'type' is OR, combine // by adding an OR node. unique_ptr listFilter; if (MatchExpression::AND == type) { listFilter = make_unique(); } else { verify(MatchExpression::OR == type); listFilter = make_unique(); } unique_ptr oldFilter = node->filter->shallowClone(); listFilter->add(oldFilter.release()); listFilter->add(match); node->filter = std::move(listFilter); } } void QueryPlannerAccess::handleFilter(ScanBuildingState* scanState) { if (MatchExpression::OR == scanState->root->matchType()) { handleFilterOr(scanState); } else if (MatchExpression::AND == scanState->root->matchType()) { handleFilterAnd(scanState); } else { // We must be building leaves for either and AND or an OR. MONGO_UNREACHABLE; } } void QueryPlannerAccess::handleFilterOr(ScanBuildingState* scanState) { MatchExpression* root = scanState->root; MatchExpression* child = root->getChild(scanState->curChild); if (scanState->inArrayOperator) { // We're inside an array operator. The entire array operator expression // should always be affixed as a filter. We keep 'curChild' in the $and // for affixing later. ++scanState->curChild; } else { if (scanState->tightness < scanState->loosestBounds) { scanState->loosestBounds = scanState->tightness; } // Detach 'child' and add it to 'curOr'. root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild); scanState->curOr->getChildVector()->push_back(child); } } void QueryPlannerAccess::handleFilterAnd(ScanBuildingState* scanState) { MatchExpression* root = scanState->root; MatchExpression* child = root->getChild(scanState->curChild); const IndexEntry& index = scanState->indices[scanState->currentIndexNumber]; if (scanState->inArrayOperator) { // We're inside an array operator. The entire array operator expression // should always be affixed as a filter. We keep 'curChild' in the $and // for affixing later. ++scanState->curChild; } else if (scanState->tightness == IndexBoundsBuilder::EXACT) { root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild); delete child; } else if (scanState->tightness == IndexBoundsBuilder::INEXACT_COVERED && (INDEX_TEXT == index.type || !index.multikey)) { // The bounds are not exact, but the information needed to // evaluate the predicate is in the index key. Remove the // MatchExpression from its parent and attach it to the filter // of the index scan we're building. // // We can only use this optimization if the index is NOT multikey. // Suppose that we had the multikey index {x: 1} and a document // {x: ["a", "b"]}. Now if we query for {x: /b/} the filter might // ever only be applied to the index key "a". We'd incorrectly // conclude that the document does not match the query :( so we // gotta stick to non-multikey indices. root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild); addFilterToSolutionNode(scanState->currentScan.get(), child, root->matchType()); } else { // We keep curChild in the AND for affixing later. ++scanState->curChild; } } std::unique_ptr QueryPlannerAccess::makeIndexScan( const IndexEntry& index, const CanonicalQuery& query, const QueryPlannerParams& params, const BSONObj& startKey, const BSONObj& endKey) { std::unique_ptr solnRoot; // Build an ixscan over the id index, use it, and return it. auto isn = stdx::make_unique(index); isn->direction = 1; isn->addKeyMetadata = query.getQueryRequest().returnKey(); isn->bounds.isSimpleRange = true; isn->bounds.startKey = startKey; isn->bounds.endKey = endKey; isn->bounds.boundInclusion = BoundInclusion::kIncludeStartKeyOnly; isn->queryCollator = query.getCollator(); unique_ptr filter = query.root()->shallowClone(); // If it's find({}) remove the no-op root. if (MatchExpression::AND == filter->matchType() && (0 == filter->numChildren())) { solnRoot = std::move(isn); } else { // TODO: We may not need to do the fetch if the predicates in root are covered. But // for now it's safe (though *maybe* slower). unique_ptr fetch = make_unique(); fetch->filter = std::move(filter); fetch->children.push_back(isn.release()); solnRoot = std::move(fetch); } return solnRoot; } } // namespace mongo