/** * Copyright (C) 2018-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery #include "mongo/platform/basic.h" #include "mongo/db/query/planner_access.h" #include #include #include "mongo/base/owned_pointer_vector.h" #include "mongo/bson/simple_bsonobj_comparator.h" #include "mongo/db/bson/dotted_path_support.h" #include "mongo/db/fts/fts_index_format.h" #include "mongo/db/fts/fts_query_noop.h" #include "mongo/db/fts/fts_spec.h" #include "mongo/db/matcher/expression_array.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_text.h" #include "mongo/db/query/index_bounds_builder.h" #include "mongo/db/query/index_tag.h" #include "mongo/db/query/indexability.h" #include "mongo/db/query/planner_wildcard_helpers.h" #include "mongo/db/query/query_knobs_gen.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/query/query_planner_common.h" #include "mongo/db/record_id_helpers.h" #include "mongo/logv2/log.h" #include "mongo/util/transitional_tools_do_not_use/vector_spooling.h" namespace { using namespace mongo; namespace wcp = ::mongo::wildcard_planning; namespace dps = ::mongo::dotted_path_support; /** * Text node functors. */ bool isTextNode(const QuerySolutionNode* node) { return STAGE_TEXT_MATCH == node->getType(); } /** * Casts 'node' to a FetchNode* if it is a FetchNode, otherwise returns null. */ FetchNode* getFetchNode(QuerySolutionNode* node) { if (STAGE_FETCH != node->getType()) { return nullptr; } return static_cast(node); } /** * If 'node' is an index scan node, casts it to IndexScanNode*. If 'node' is a FetchNode with an * IndexScanNode child, then returns a pointer to the child index scan node. Otherwise returns * null. */ const IndexScanNode* getIndexScanNode(const QuerySolutionNode* node) { if (STAGE_IXSCAN == node->getType()) { return static_cast(node); } else if (STAGE_FETCH == node->getType()) { invariant(1U == node->children.size()); const QuerySolutionNode* child = node->children[0]; if (STAGE_IXSCAN == child->getType()) { return static_cast(child); } } return nullptr; } /** * Takes as input two query solution nodes returned by processIndexScans(). If both are * IndexScanNode or FetchNode with an IndexScanNode child and the index scan nodes are identical * (same bounds, same filter, same direction, etc.), then returns true. Otherwise returns false. */ bool scansAreEquivalent(const QuerySolutionNode* lhs, const QuerySolutionNode* rhs) { const IndexScanNode* leftIxscan = getIndexScanNode(lhs); const IndexScanNode* rightIxscan = getIndexScanNode(rhs); if (!leftIxscan || !rightIxscan) { return false; } return *leftIxscan == *rightIxscan; } /** * If all nodes can provide the requested sort, returns a vector expressing which nodes must have * their index scans reversed to provide the sort. Otherwise, returns an empty vector. * 'nodes' must not be empty. */ std::vector canProvideSortWithMergeSort( const std::vector>& nodes, const BSONObj& requestedSort) { invariant(!nodes.empty()); std::vector shouldReverseScan; const auto reverseSort = QueryPlannerCommon::reverseSortObj(requestedSort); for (auto&& node : nodes) { node->computeProperties(); if (node->providedSorts().contains(requestedSort)) { shouldReverseScan.push_back(false); } else if (node->providedSorts().contains(reverseSort)) { shouldReverseScan.push_back(true); } else { return {}; } } return shouldReverseScan; } } // namespace namespace mongo { using std::unique_ptr; using std::vector; namespace { /** * Extracts the lower and upper bounds on the "ts" field from 'me'. This only examines comparisons * of "ts" against a Timestamp at the top level or inside a top-level $and. */ std::pair, boost::optional> extractTsRange( const MatchExpression* me, bool topLevel = true) { boost::optional min; boost::optional max; if (me->matchType() == MatchExpression::AND && topLevel) { for (size_t i = 0; i < me->numChildren(); ++i) { boost::optional childMin; boost::optional childMax; std::tie(childMin, childMax) = extractTsRange(me->getChild(i), false); if (childMin && (!min || childMin.get() > min.get())) { min = childMin; } if (childMax && (!max || childMax.get() < max.get())) { max = childMax; } } return {min, max}; } if (!ComparisonMatchExpression::isComparisonMatchExpression(me) || me->path() != repl::OpTime::kTimestampFieldName) { return {min, max}; } auto rawElem = static_cast(me)->getData(); if (rawElem.type() != BSONType::bsonTimestamp) { return {min, max}; } switch (me->matchType()) { case MatchExpression::EQ: min = rawElem.timestamp(); max = rawElem.timestamp(); return {min, max}; case MatchExpression::GT: case MatchExpression::GTE: min = rawElem.timestamp(); return {min, max}; case MatchExpression::LT: case MatchExpression::LTE: max = rawElem.timestamp(); return {min, max}; default: MONGO_UNREACHABLE; } } /** * Returns true if 'me' is a GTE or GE predicate over the "ts" field. */ bool isOplogTsLowerBoundPred(const mongo::MatchExpression* me) { if (mongo::MatchExpression::GT != me->matchType() && mongo::MatchExpression::GTE != me->matchType()) { return false; } return me->path() == repl::OpTime::kTimestampFieldName; } /** * Helper function to add an RID range to collection scans. * If the query solution tree contains a collection scan node with a suitable comparison * predicate on '_id', we add a minRecord and maxRecord on the collection node. */ void handleRIDRangeScan(const MatchExpression* conjunct, CollectionScanNode* collScan) { if (conjunct == nullptr) { return; } auto* andMatchPtr = dynamic_cast(conjunct); if (andMatchPtr != nullptr) { for (size_t index = 0; index < andMatchPtr->numChildren(); index++) { handleRIDRangeScan(andMatchPtr->getChild(index), collScan); } return; } if (conjunct->path() != "_id") { return; } const bool hasMaxRecord = collScan->maxRecord.has_value(); const bool hasMinRecord = collScan->minRecord.has_value(); if (!hasMinRecord && !hasMaxRecord) { if (auto eq = dynamic_cast(conjunct)) { collScan->minRecord = record_id_helpers::keyForElem(eq->getData()); collScan->maxRecord = collScan->minRecord; return; } } if (!hasMaxRecord) { if (auto ltConjunct = dynamic_cast(conjunct)) { collScan->maxRecord = record_id_helpers::keyForElem(ltConjunct->getData()); } else if (auto lteConjunct = dynamic_cast(conjunct)) { collScan->maxRecord = record_id_helpers::keyForElem(lteConjunct->getData()); } } if (!hasMinRecord) { if (auto gtConjunct = dynamic_cast(conjunct)) { collScan->minRecord = record_id_helpers::keyForElem(gtConjunct->getData()); } else if (auto gteConjunct = dynamic_cast(conjunct)) { collScan->minRecord = record_id_helpers::keyForElem(gteConjunct->getData()); } } } } // namespace std::unique_ptr QueryPlannerAccess::makeCollectionScan( const CanonicalQuery& query, bool tailable, const QueryPlannerParams& params) { // Make the (only) node, a collection scan. auto csn = std::make_unique(); csn->name = query.ns(); csn->filter = query.root()->shallowClone(); csn->tailable = tailable; csn->shouldTrackLatestOplogTimestamp = params.options & QueryPlannerParams::TRACK_LATEST_OPLOG_TS; csn->shouldWaitForOplogVisibility = params.options & QueryPlannerParams::OPLOG_SCAN_WAIT_FOR_VISIBLE; // If the hint is {$natural: +-1} this changes the direction of the collection scan. const BSONObj& hint = query.getFindCommandRequest().getHint(); if (!hint.isEmpty()) { BSONElement natural = hint[query_request_helper::kNaturalSortField]; if (natural) { csn->direction = natural.numberInt() >= 0 ? 1 : -1; } } // If the client requested a resume token and we are scanning the oplog, prepare // the collection scan to return timestamp-based tokens. Otherwise, we should // return generic RecordId-based tokens. if (query.getFindCommandRequest().getRequestResumeToken()) { csn->shouldTrackLatestOplogTimestamp = query.nss().isOplog(); csn->requestResumeToken = !query.nss().isOplog(); } // Extract and assign the RecordId from the 'resumeAfter' token, if present. const BSONObj& resumeAfterObj = query.getFindCommandRequest().getResumeAfter(); if (!resumeAfterObj.isEmpty()) { BSONElement recordIdElem = resumeAfterObj["$recordId"]; csn->resumeAfterRecordId = RecordId::deserializeToken(recordIdElem); } const bool assertMinTsHasNotFallenOffOplog = params.options & QueryPlannerParams::ASSERT_MIN_TS_HAS_NOT_FALLEN_OFF_OPLOG; if (query.nss().isOplog() && csn->direction == 1) { // Optimizes the start and end location parameters for a collection scan for an oplog // collection. Not compatible with $_resumeAfter so we do not optimize in that case. if (resumeAfterObj.isEmpty()) { auto [minTs, maxTs] = extractTsRange(query.root()); if (minTs) { StatusWith goal = record_id_helpers::keyForOptime(*minTs); if (goal.isOK()) { csn->minRecord = goal.getValue(); } if (assertMinTsHasNotFallenOffOplog) { csn->assertTsHasNotFallenOffOplog = *minTs; } } if (maxTs) { StatusWith goal = record_id_helpers::keyForOptime(*maxTs); if (goal.isOK()) { csn->maxRecord = goal.getValue(); } } } // If the query is just a lower bound on "ts" on a forward scan, every document in the // collection after the first matching one must also match. To avoid wasting time // running the match expression on every document to be returned, we tell the // CollectionScan stage to stop applying the filter once it finds the first match. if (isOplogTsLowerBoundPred(query.root())) { csn->stopApplyingFilterAfterFirstMatch = true; } } // The user may have requested 'assertMinTsHasNotFallenOffOplog' for a query that does not // specify a minimum timestamp. This is not a valid request, so we throw InvalidOptions. if (assertMinTsHasNotFallenOffOplog) { uassert(ErrorCodes::InvalidOptions, str::stream() << "assertTsHasNotFallenOffOplog cannot be applied to a query " "which does not imply a minimum 'ts' value ", csn->assertTsHasNotFallenOffOplog); } if (params.allowRIDRange && !csn->resumeAfterRecordId) { handleRIDRangeScan(csn->filter.get(), csn.get()); } return csn; } std::unique_ptr QueryPlannerAccess::makeLeafNode( const CanonicalQuery& query, const IndexEntry& index, size_t pos, const MatchExpression* expr, IndexBoundsBuilder::BoundsTightness* tightnessOut) { // We're guaranteed that all GEO_NEARs are first. This slightly violates the "sort index // predicates by their position in the compound index" rule but GEO_NEAR isn't an ixscan. // This saves our bacon when we have {foo: 1, bar: "2dsphere"} and the predicate on bar is a // $near. If we didn't get the GEO_NEAR first we'd create an IndexScanNode and later cast // it to a GeoNear2DSphereNode // // This should gracefully deal with the case where we have a pred over foo but no geo clause // over bar. In that case there is no GEO_NEAR to appear first and it's treated like a // straight ixscan. if (MatchExpression::GEO_NEAR == expr->matchType()) { // We must not keep the expression node around. *tightnessOut = IndexBoundsBuilder::EXACT; auto nearExpr = static_cast(expr); BSONElement elt = index.keyPattern.firstElement(); bool indexIs2D = (String == elt.type() && "2d" == elt.String()); if (indexIs2D) { auto ret = std::make_unique(index); ret->nq = &nearExpr->getData(); ret->baseBounds.fields.resize(index.keyPattern.nFields()); ret->addPointMeta = query.metadataDeps()[DocumentMetadataFields::kGeoNearPoint]; ret->addDistMeta = query.metadataDeps()[DocumentMetadataFields::kGeoNearDist]; return ret; } else { auto ret = std::make_unique(index); ret->nq = &nearExpr->getData(); ret->baseBounds.fields.resize(index.keyPattern.nFields()); ret->addPointMeta = query.metadataDeps()[DocumentMetadataFields::kGeoNearPoint]; ret->addDistMeta = query.metadataDeps()[DocumentMetadataFields::kGeoNearDist]; return ret; } } else if (MatchExpression::TEXT == expr->matchType()) { // We must not keep the expression node around. *tightnessOut = IndexBoundsBuilder::EXACT; auto textExpr = static_cast(expr); auto ret = std::make_unique( index, textExpr->getFTSQuery().clone(), query.metadataDeps()[DocumentMetadataFields::kTextScore]); // Count the number of prefix fields before the "text" field. for (auto&& keyPatternElt : ret->index.keyPattern) { // We know that the only key pattern with a type of String is the _fts field // which is immediately after all prefix fields. if (BSONType::String == keyPatternElt.type()) { break; } ++(ret->numPrefixFields); } return ret; } else { // Note that indexKeyPattern.firstElement().fieldName() may not equal expr->path() // because expr might be inside an array operator that provides a path prefix. auto isn = std::make_unique(index); isn->bounds.fields.resize(index.keyPattern.nFields()); isn->addKeyMetadata = query.metadataDeps()[DocumentMetadataFields::kIndexKey]; isn->queryCollator = query.getCollator(); // Get the ixtag->pos-th element of the index key pattern. // TODO: cache this instead/with ixtag->pos? BSONObjIterator it(index.keyPattern); BSONElement keyElt = it.next(); for (size_t i = 0; i < pos; ++i) { verify(it.more()); keyElt = it.next(); } verify(!keyElt.eoo()); IndexBoundsBuilder::translate(expr, keyElt, index, &isn->bounds.fields[pos], tightnessOut); return isn; } } bool QueryPlannerAccess::shouldMergeWithLeaf(const MatchExpression* expr, const ScanBuildingState& scanState) { const QuerySolutionNode* node = scanState.currentScan.get(); if (nullptr == node || nullptr == expr) { return false; } if (nullptr == scanState.ixtag) { return false; } if (scanState.currentIndexNumber != scanState.ixtag->index) { return false; } size_t pos = scanState.ixtag->pos; const IndexEntry& index = scanState.indices[scanState.currentIndexNumber]; const MatchExpression::MatchType mergeType = scanState.root->matchType(); const StageType type = node->getType(); const MatchExpression::MatchType exprType = expr->matchType(); // // First handle special solution tree leaf types. In general, normal index bounds // building is not used for special leaf types, and hence we cannot merge leaves. // // This rule is always true for OR, but there are exceptions for AND. // Specifically, we can often merge a predicate with a special leaf type // by adding a filter to the special leaf type. // if (STAGE_TEXT_MATCH == type) { // Currently only one text predicate is allowed, but to be safe, make sure that we // do not try to merge two text predicates. return MatchExpression::AND == mergeType && MatchExpression::TEXT != exprType; } if (STAGE_GEO_NEAR_2D == type || STAGE_GEO_NEAR_2DSPHERE == type) { // Currently only one GEO_NEAR is allowed, but to be safe, make sure that we // do not try to merge two GEO_NEAR predicates. return MatchExpression::AND == mergeType && MatchExpression::GEO_NEAR != exprType; } // // If we're here, then we're done checking for special leaf nodes, and the leaf // must be a regular index scan. // invariant(type == STAGE_IXSCAN); const IndexScanNode* scan = static_cast(node); const IndexBounds* boundsToFillOut = &scan->bounds; if (boundsToFillOut->fields[pos].name.empty()) { // Bounds have yet to be assigned for the 'pos' position in the index. The plan enumerator // should have told us that it is safe to compound bounds in this case. invariant(scanState.ixtag->canCombineBounds); return true; } else { // Bounds have already been assigned for the 'pos' position in the index. if (MatchExpression::AND == mergeType) { // The bounds on the 'pos' position in the index would be intersected if we merged these // two leaf expressions. if (!scanState.ixtag->canCombineBounds) { // If the plan enumerator told us that it isn't safe to intersect bounds in this // case, then it must be because we're using a multikey index. invariant(index.multikey); } return scanState.ixtag->canCombineBounds; } else { // The bounds will be unionized. return true; } } } void QueryPlannerAccess::mergeWithLeafNode(MatchExpression* expr, ScanBuildingState* scanState) { QuerySolutionNode* node = scanState->currentScan.get(); invariant(nullptr != node); const MatchExpression::MatchType mergeType = scanState->root->matchType(); size_t pos = scanState->ixtag->pos; const IndexEntry& index = scanState->indices[scanState->currentIndexNumber]; const StageType type = node->getType(); if (STAGE_TEXT_MATCH == type) { auto textNode = static_cast(node); if (pos < textNode->numPrefixFields) { // This predicate is assigned to one of the prefix fields of the text index. Such // predicates must always be equalities and must always be attached to the TEXT node. In // order to ensure this happens, we assign INEXACT_COVERED tightness. scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED; } else { // The predicate is assigned to one of the trailing fields of the text index. We // currently don't generate bounds for predicates assigned to trailing fields of a text // index, but rather attempt to attach a covered filter. However, certain predicates can // never be correctly covered (e.g. $exists), so we assign the tightness accordingly. scanState->tightness = IndexBoundsBuilder::canUseCoveredMatching(expr, index) ? IndexBoundsBuilder::INEXACT_COVERED : IndexBoundsBuilder::INEXACT_FETCH; } return; } IndexBounds* boundsToFillOut = nullptr; if (STAGE_GEO_NEAR_2D == type) { invariant(INDEX_2D == index.type); // 2D indexes have a special format - the "2d" field stores a normally-indexed BinData // field, but additional array fields are *not* exploded into multi-keys - they are stored // directly as arrays in the index. Also, no matter what the index expression, the "2d" // field is always first. // // This means that we can only generically accumulate bounds for 2D indexes over the first // "2d" field (pos == 0) - MatchExpressions over other fields in the 2D index may be covered // (can be evaluated using only the 2D index key). The additional fields must not affect // the index scan bounds, since they are not stored in an IndexScan-compatible format. if (pos > 0) { // The predicate is over a trailing field of the "2d" index. If possible, we assign it // as a covered filter (the INEXACT_COVERED case). Otherwise, the filter must be // evaluated after fetching the full documents. scanState->tightness = IndexBoundsBuilder::canUseCoveredMatching(expr, index) ? IndexBoundsBuilder::INEXACT_COVERED : IndexBoundsBuilder::INEXACT_FETCH; return; } // We may have other $geoPredicates on a near index - generate bounds for these GeoNear2DNode* gn = static_cast(node); boundsToFillOut = &gn->baseBounds; } else if (STAGE_GEO_NEAR_2DSPHERE == type) { GeoNear2DSphereNode* gn = static_cast(node); boundsToFillOut = &gn->baseBounds; } else { verify(type == STAGE_IXSCAN); IndexScanNode* scan = static_cast(node); // See STAGE_GEO_NEAR_2D above - 2D indexes can only accumulate scan bounds over the first // "2d" field (pos == 0). if (INDEX_2D == index.type && pos > 0) { // The predicate is over a trailing field of the "2d" index. If possible, we assign it // as a covered filter (the INEXACT_COVERED case). Otherwise, the filter must be // evaluated after fetching the full documents. scanState->tightness = IndexBoundsBuilder::canUseCoveredMatching(expr, index) ? IndexBoundsBuilder::INEXACT_COVERED : IndexBoundsBuilder::INEXACT_FETCH; return; } boundsToFillOut = &scan->bounds; } // Get the ixtag->pos-th element of the index key pattern. // TODO: cache this instead/with ixtag->pos? BSONObjIterator it(index.keyPattern); BSONElement keyElt = it.next(); for (size_t i = 0; i < pos; ++i) { verify(it.more()); keyElt = it.next(); } verify(!keyElt.eoo()); scanState->tightness = IndexBoundsBuilder::INEXACT_FETCH; verify(boundsToFillOut->fields.size() > pos); OrderedIntervalList* oil = &boundsToFillOut->fields[pos]; if (boundsToFillOut->fields[pos].name.empty()) { IndexBoundsBuilder::translate(expr, keyElt, index, oil, &scanState->tightness); } else { if (MatchExpression::AND == mergeType) { IndexBoundsBuilder::translateAndIntersect( expr, keyElt, index, oil, &scanState->tightness); } else { verify(MatchExpression::OR == mergeType); IndexBoundsBuilder::translateAndUnion(expr, keyElt, index, oil, &scanState->tightness); } } } void buildTextSubPlan(TextMatchNode* tn) { tassert(5432205, "text match node is null", tn); tassert(5432206, "text match node already has children", tn->children.empty()); tassert(5432207, "text search query is not provided", tn->ftsQuery.get()); auto query = dynamic_cast(tn->ftsQuery.get()); // If we're unable to cast to FTSQueryImpl, then the given query must be an FTSQueryNoop, which // is only used for testing the QueryPlanner and never tries to execute the query, so we don't // need to construct an entire text sub-plan. Moreover, to compute index bounds we need a list // of terms, which can only be obtain from FTSQueryImpl. if (!query) { return; } // If the query requires the "textScore" field or involves multiple search terms, a TEXT_OR or // OR stage is needed. Otherwise, we can use a single index scan directly. const bool needOrStage = tn->wantTextScore || query->getTermsForBounds().size() > 1; tassert(5432208, "failed to obtain text index version", tn->index.infoObj.hasField("textIndexVersion")); const auto textIndexVersion = static_cast(tn->index.infoObj["textIndexVersion"].numberInt()); // Get all the index scans for each term in our query. std::vector> indexScanList; indexScanList.reserve(query->getTermsForBounds().size()); for (const auto& term : query->getTermsForBounds()) { auto ixscan = std::make_unique(tn->index); ixscan->bounds.startKey = fts::FTSIndexFormat::getIndexKey( fts::MAX_WEIGHT, term, tn->indexPrefix, textIndexVersion); ixscan->bounds.endKey = fts::FTSIndexFormat::getIndexKey(0, term, tn->indexPrefix, textIndexVersion); ixscan->bounds.boundInclusion = BoundInclusion::kIncludeBothStartAndEndKeys; ixscan->bounds.isSimpleRange = true; ixscan->direction = -1; ixscan->shouldDedup = tn->index.multikey; // If we will be adding a TEXT_OR or OR stage, then it is responsible for applying the // filter. Otherwise, the index scan applies the filter. if (!needOrStage && tn->filter) { ixscan->filter = tn->filter->shallowClone(); } indexScanList.push_back(std::move(ixscan)); } // In case the query didn't have any search term, we can simply use an EOF sub-plan, as no // results can be returned in this case anyway. if (indexScanList.empty()) { indexScanList.push_back(std::make_unique()); } // Build the union of the index scans as a TEXT_OR or an OR stage, depending on whether the // projection requires the "textScore" $meta field. if (tn->wantTextScore) { // We use a TEXT_OR stage to get the union of the results from the index scans and then // compute their text scores. This is a blocking operation. auto textScorer = std::make_unique(); textScorer->filter = std::move(tn->filter); for (auto&& ixscan : indexScanList) { textScorer->children.push_back(ixscan.release()); } tn->children.push_back(textScorer.release()); } else { // Because we don't need the text score, we can use a non-blocking OR stage to get the union // of the index scans or use the index scan directly if there is only one. auto textSearcher = [&]() -> std::unique_ptr { if (indexScanList.size() == 1) { tassert(5397400, "If there is only one index scan and we do not need textScore, needOrStage " "should be false", !needOrStage); return std::move(indexScanList[0]); } else { auto orTextSearcher = std::make_unique(); orTextSearcher->filter = std::move(tn->filter); for (auto&& ixscan : indexScanList) { orTextSearcher->children.push_back(ixscan.release()); } return std::move(orTextSearcher); } }(); // Unlike the TEXT_OR stage, the OR stage does not fetch the documents that it outputs. We // add our own FETCH stage to satisfy the requirement of the TEXT_MATCH stage that its // WorkingSetMember inputs have fetched data. auto fetchNode = std::make_unique(); fetchNode->children.push_back(textSearcher.release()); tn->children.push_back(fetchNode.release()); } } void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntry& index) { auto tn = static_cast(node); // If there's no prefix, the filter is already on the node and the index prefix is null. // We can just return. if (!tn->numPrefixFields) { buildTextSubPlan(tn); return; } // We can't create a text stage if there aren't EQ predicates on its prefix terms. So // if we've made it this far, we should have collected the prefix predicates in the // filter. invariant(nullptr != tn->filter.get()); MatchExpression* textFilterMe = tn->filter.get(); BSONObjBuilder prefixBob; if (MatchExpression::AND != textFilterMe->matchType()) { // Only one prefix term. invariant(1u == tn->numPrefixFields); // Sanity check: must be an EQ. invariant(MatchExpression::EQ == textFilterMe->matchType()); EqualityMatchExpression* eqExpr = static_cast(textFilterMe); prefixBob.append(eqExpr->getData()); tn->filter.reset(); } else { invariant(MatchExpression::AND == textFilterMe->matchType()); // Indexed by the keyPattern position index assignment. We want to add // prefixes in order but we must order them first. vector> prefixExprs(tn->numPrefixFields); AndMatchExpression* amExpr = static_cast(textFilterMe); invariant(amExpr->numChildren() >= tn->numPrefixFields); // Look through the AND children. The prefix children we want to // stash in prefixExprs. size_t curChild = 0; while (curChild < amExpr->numChildren()) { IndexTag* ixtag = static_cast(amExpr->getChild(curChild)->getTag()); invariant(nullptr != ixtag); // Skip this child if it's not part of a prefix, or if we've already assigned a // predicate to this prefix position. if (ixtag->pos >= tn->numPrefixFields || prefixExprs[ixtag->pos] != nullptr) { ++curChild; continue; } prefixExprs[ixtag->pos] = std::move((*amExpr->getChildVector())[curChild]); amExpr->getChildVector()->erase(amExpr->getChildVector()->begin() + curChild); // Don't increment curChild. } // Go through the prefix equalities in order and create an index prefix out of them. for (size_t i = 0; i < prefixExprs.size(); ++i) { auto prefixMe = prefixExprs[i].get(); invariant(nullptr != prefixMe); invariant(MatchExpression::EQ == prefixMe->matchType()); EqualityMatchExpression* eqExpr = static_cast(prefixMe); prefixBob.append(eqExpr->getData()); } // Clear out an empty $and. if (0 == amExpr->numChildren()) { tn->filter.reset(); } else if (1 == amExpr->numChildren()) { // Clear out unsightly only child of $and. auto child = std::move((*amExpr->getChildVector())[0]); amExpr->getChildVector()->clear(); // Deletes current filter which is amExpr. tn->filter = std::move(child); } } tn->indexPrefix = prefixBob.obj(); buildTextSubPlan(tn); } bool QueryPlannerAccess::orNeedsFetch(const ScanBuildingState* scanState) { if (scanState->loosestBounds == IndexBoundsBuilder::EXACT) { return false; } else if (scanState->loosestBounds == IndexBoundsBuilder::INEXACT_FETCH) { return true; } else { invariant(scanState->loosestBounds == IndexBoundsBuilder::INEXACT_COVERED); const IndexEntry& index = scanState->indices[scanState->currentIndexNumber]; return index.multikey; } } void QueryPlannerAccess::finishAndOutputLeaf(ScanBuildingState* scanState, vector>* out) { finishLeafNode(scanState->currentScan.get(), scanState->indices[scanState->currentIndexNumber]); if (MatchExpression::OR == scanState->root->matchType()) { if (orNeedsFetch(scanState)) { // In order to correctly evaluate the predicates for this index, we have to // fetch the full documents. Add a fetch node above the index scan whose filter // includes *all* of the predicates used to generate the ixscan. auto fetch = std::make_unique(); // Takes ownership. fetch->filter = std::move(scanState->curOr); // Takes ownership. fetch->children.push_back(scanState->currentScan.release()); scanState->currentScan = std::move(fetch); } else if (scanState->loosestBounds == IndexBoundsBuilder::INEXACT_COVERED) { // This an OR, at least one of the predicates used to generate 'currentScan' // is inexact covered, but none is inexact fetch. This means that we can put // these predicates, joined by an $or, as filters on the index scan. This avoids // a fetch and allows the predicates to be covered by the index. // // Ex. // Say we have index {a: 1} and query {$or: [{a: /foo/}, {a: /bar/}]}. // The entire query, {$or: [{a: /foo/}, {a: /bar/}]}, should be a filter // in the index scan stage itself. scanState->currentScan->filter = std::move(scanState->curOr); } } out->push_back(std::move(scanState->currentScan)); } void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntry& index) { const StageType type = node->getType(); if (STAGE_TEXT_MATCH == type) { return finishTextNode(node, index); } IndexEntry* nodeIndex = nullptr; IndexBounds* bounds = nullptr; if (STAGE_GEO_NEAR_2D == type) { GeoNear2DNode* gnode = static_cast(node); bounds = &gnode->baseBounds; nodeIndex = &gnode->index; } else if (STAGE_GEO_NEAR_2DSPHERE == type) { GeoNear2DSphereNode* gnode = static_cast(node); bounds = &gnode->baseBounds; nodeIndex = &gnode->index; } else { verify(type == STAGE_IXSCAN); IndexScanNode* scan = static_cast(node); nodeIndex = &scan->index; bounds = &scan->bounds; // If this is a $** index, update and populate the keyPattern, bounds, and multikeyPaths. if (index.type == IndexType::INDEX_WILDCARD) { wcp::finalizeWildcardIndexScanConfiguration(scan); } } // Find the first field in the scan's bounds that was not filled out. // TODO: could cache this. size_t firstEmptyField = 0; for (firstEmptyField = 0; firstEmptyField < bounds->fields.size(); ++firstEmptyField) { if (bounds->fields[firstEmptyField].name.empty()) { verify(bounds->fields[firstEmptyField].intervals.empty()); break; } } // All fields are filled out with bounds, nothing to do. if (firstEmptyField == bounds->fields.size()) { return IndexBoundsBuilder::alignBounds(bounds, nodeIndex->keyPattern); } // Skip ahead to the firstEmptyField-th element, where we begin filling in bounds. BSONObjIterator it(nodeIndex->keyPattern); for (size_t i = 0; i < firstEmptyField; ++i) { verify(it.more()); it.next(); } // For each field in the key... while (it.more()) { BSONElement kpElt = it.next(); // There may be filled-in fields to the right of the firstEmptyField; for instance, the // index {loc:"2dsphere", x:1} with a predicate over x and a near search over loc. if (bounds->fields[firstEmptyField].name.empty()) { verify(bounds->fields[firstEmptyField].intervals.empty()); IndexBoundsBuilder::allValuesForField(kpElt, &bounds->fields[firstEmptyField]); } ++firstEmptyField; } // Make sure that the length of the key is the length of the bounds we started. verify(firstEmptyField == bounds->fields.size()); // We create bounds assuming a forward direction but can easily reverse bounds to align // according to our desired direction. IndexBoundsBuilder::alignBounds(bounds, nodeIndex->keyPattern); } void QueryPlannerAccess::findElemMatchChildren(const MatchExpression* node, vector* out, vector* subnodesOut) { for (size_t i = 0; i < node->numChildren(); ++i) { MatchExpression* child = node->getChild(i); if (Indexability::isBoundsGenerating(child) && nullptr != child->getTag()) { out->push_back(child); } else if (MatchExpression::AND == child->matchType() || Indexability::arrayUsesIndexOnChildren(child)) { findElemMatchChildren(child, out, subnodesOut); } else if (nullptr != child->getTag()) { subnodesOut->push_back(child); } } } std::vector> QueryPlannerAccess::collapseEquivalentScans( std::vector> scans) { invariant(scans.size() > 0); // Scans that need to be collapsed will be adjacent to each other in the list due to how we // sort the query predicate. We step through the list, either merging the current scan into // the last scan in 'collapsedScans', or adding a new entry to 'collapsedScans' if it can't // be merged. std::vector> collapsedScans; collapsedScans.push_back(std::move(scans[0])); for (size_t i = 1; i < scans.size(); ++i) { if (scansAreEquivalent(collapsedScans.back().get(), scans[i].get())) { // We collapse the entry from 'ownedScans' into the back of 'collapsedScans'. std::unique_ptr collapseFrom = std::move(scans[i]); FetchNode* collapseFromFetch = getFetchNode(collapseFrom.get()); FetchNode* collapseIntoFetch = getFetchNode(collapsedScans.back().get()); // If there's no filter associated with a fetch node on 'collapseFrom', all we have to // do is clear the filter on the node that we are collapsing into. if (!collapseFromFetch || !collapseFromFetch->filter.get()) { if (collapseIntoFetch) { collapseIntoFetch->filter.reset(); } continue; } // If there's no filter associated with a fetch node on the back of the 'collapsedScans' // list, then there's nothing more to do. if (!collapseIntoFetch || !collapseIntoFetch->filter.get()) { continue; } // Both the 'from' and 'into' nodes have filters. We join them with an // OrMatchExpression. std::unique_ptr collapsedFilter = std::make_unique(); collapsedFilter->add(std::move(collapseFromFetch->filter)); collapsedFilter->add(std::move(collapseIntoFetch->filter)); // Normalize the filter and add it to 'into'. collapseIntoFetch->filter = MatchExpression::optimize(std::move(collapsedFilter)); } else { // Scans are not equivalent and can't be collapsed. collapsedScans.push_back(std::move(scans[i])); } } invariant(collapsedScans.size() > 0); return collapsedScans; } /** * Returns true if this is a null query that can retrieve all the information it needs directly from * the index, and so does not need a FETCH stage on top of it. Returns false otherwise. */ bool isCoveredNullQuery(const CanonicalQuery& query, MatchExpression* root, IndexTag* tag, const vector& indices, const QueryPlannerParams& params) { // We are only interested in queries checking for an indexed field equalling null. // This optimization can only be done when the index is not multikey, otherwise empty arrays // in the collection will be treated as null/undefined by the index. Additionally, // sparse indexes and hashed indexes should not use this optimization as they will require a // FETCH stage with a filter. if (indices[tag->index].multikey || indices[tag->index].sparse || indices[tag->index].type == IndexType::INDEX_HASHED || !ComparisonMatchExpressionBase::isEquality(root->matchType())) { return false; } // Check if the query is looking for null values. const auto node = static_cast(root); if (node->getData().type() != BSONType::jstNULL) { return false; } // If nothing is being projected, the query is fully covered without a fetch. // This is trivially true for a count query. if (params.options & QueryPlannerParams::Options::IS_COUNT) { return true; } // This optimization can only be used for find when the index covers the projection completely. // However, if the indexed field is in the projection, the index may return an incorrect value // for the field, since it does not distinguish between null and undefined. Hence, only find // queries projecting _id are covered. auto proj = query.getProj(); if (!proj) { return false; } // We can cover projections on _id and generated fields and expressions depending only on _id. // However, if the projection is an exclusion, requires match details, requires the full // document, or requires metadata, we will still need a FETCH stage. if (proj->type() == projection_ast::ProjectType::kInclusion && !proj->requiresMatchDetails() && proj->metadataDeps().none() && !proj->requiresDocument()) { auto projFields = proj->getRequiredFields(); // Note that it is not possible to project onto dotted paths of _id here, since they may be // null or missing, and the index cannot differentiate between the two cases, so we would // still need a FETCH stage. return projFields.size() == 1 && projFields[0] == "_id"; } return false; } bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query, MatchExpression* root, bool inArrayOperator, const std::vector& indices, const QueryPlannerParams& params, std::vector>* out) { // Initialize the ScanBuildingState. ScanBuildingState scanState(root, indices, inArrayOperator); while (scanState.curChild < root->numChildren()) { MatchExpression* child = root->getChild(scanState.curChild); // If there is no tag, it's not using an index. We've sorted our children such that the // children with tags are first, so we stop now. if (nullptr == child->getTag()) { break; } scanState.ixtag = static_cast(child->getTag()); // If there's a tag it must be valid. verify(IndexTag::kNoIndex != scanState.ixtag->index); // If the child can't use an index on its own field (and the child is not a negation // of a bounds-generating expression), then it's indexed by virtue of one of // its children having an index. // // NOTE: If the child is logical, it could possibly collapse into a single ixscan. we // ignore this for now. if (!Indexability::isBoundsGenerating(child)) { // If we're here, then the child is indexed by virtue of its children. // In most cases this means that we recursively build indexed data // access on 'child'. if (!processIndexScansSubnode(query, &scanState, params, out)) { return false; } continue; } // If we're here, we now know that 'child' can use an index directly and the index is // over the child's field. // We need to track if this is a covered null query so that we can have this information // at hand when handling the filter on an indexed AND. scanState.isCoveredNullQuery = isCoveredNullQuery(query, child, scanState.ixtag, indices, params); // If 'child' is a NOT, then the tag we're interested in is on the NOT's // child node. if (MatchExpression::NOT == child->matchType()) { scanState.ixtag = static_cast(child->getChild(0)->getTag()); invariant(IndexTag::kNoIndex != scanState.ixtag->index); } // If the child we're looking at uses a different index than the current index scan, add // the current index scan to the output as we're done with it. The index scan created // by the child then becomes our new current index scan. Note that the current scan // could be NULL, in which case we don't output it. The rest of the logic is identical. // // If the child uses the same index as the current index scan, we may be able to merge // the bounds for the two scans. // // Guiding principle: must the values we're testing come from the same array in the // document? If so, we can combine bounds (via intersection or compounding). If not, // we can't. // // If the index is NOT multikey, it's always semantically correct to combine bounds, // as there are no arrays to worry about. // // If the index is multikey, there are arrays of values. There are several // complications in the multikey case that have to be obeyed both by the enumerator // and here as we try to merge predicates into query solution leaves. The hairy // details of these rules are documented near the top of planner_access.h. if (shouldMergeWithLeaf(child, scanState)) { // The child uses the same index we're currently building a scan for. Merge // the bounds and filters. verify(scanState.currentIndexNumber == scanState.ixtag->index); scanState.tightness = IndexBoundsBuilder::INEXACT_FETCH; mergeWithLeafNode(child, &scanState); handleFilter(&scanState); } else { if (nullptr != scanState.currentScan.get()) { // Output the current scan before starting to construct a new out. finishAndOutputLeaf(&scanState, out); } else { verify(IndexTag::kNoIndex == scanState.currentIndexNumber); } // Reset state before producing a new leaf. scanState.resetForNextScan(scanState.ixtag); scanState.currentScan = makeLeafNode(query, indices[scanState.currentIndexNumber], scanState.ixtag->pos, child, &scanState.tightness); handleFilter(&scanState); } } // Output the scan we're done with, if it exists. if (nullptr != scanState.currentScan.get()) { finishAndOutputLeaf(&scanState, out); } return true; } bool QueryPlannerAccess::processIndexScansElemMatch( const CanonicalQuery& query, ScanBuildingState* scanState, const QueryPlannerParams& params, std::vector>* out) { MatchExpression* root = scanState->root; MatchExpression* child = root->getChild(scanState->curChild); const vector& indices = scanState->indices; // We have an AND with an ELEM_MATCH_OBJECT child. The plan enumerator produces // index taggings which indicate that we should try to compound with // predicates retrieved from inside the subtree rooted at the ELEM_MATCH. // In order to obey the enumerator's tagging, we need to retrieve these // predicates from inside the $elemMatch, and try to merge them with // the current index scan. // Contains tagged predicates from inside the tree rooted at 'child' // which are logically part of the AND. vector emChildren; // Contains tagged nodes that are not logically part of the AND and // cannot use the index directly (e.g. OR nodes which are tagged to // be indexed). vector emSubnodes; // Populate 'emChildren' and 'emSubnodes'. findElemMatchChildren(child, &emChildren, &emSubnodes); // Recursively build data access for the nodes inside 'emSubnodes'. for (size_t i = 0; i < emSubnodes.size(); ++i) { MatchExpression* subnode = emSubnodes[i]; if (!Indexability::isBoundsGenerating(subnode)) { // 'subnode' is beneath an $elemMatch. When planning the children of array operators, we // keep ownership of the match expression node. Therefore, we pass nullptr for the // 'ownedRoot' argument. auto childSolution = _buildIndexedDataAccess(query, subnode, nullptr, indices, params); // _buildIndexedDataAccess(...) returns NULL in error conditions, when it is unable to // construct a query solution from a tagged match expression tree. If we are unable to // construct a solution according to the instructions from the enumerator, then we bail // out early (by returning false) rather than continuing on and potentially constructing // an invalid solution tree. if (!childSolution) { return false; } // Output the resulting solution tree. out->push_back(std::move(childSolution)); } } // For each predicate in 'emChildren', try to merge it with the current index scan. // // This loop is similar to that in processIndexScans(...), except it does not call into // handleFilters(...). Instead, we leave the entire $elemMatch filter intact. This way, // the complete $elemMatch expression will be affixed as a filter later on. for (size_t i = 0; i < emChildren.size(); ++i) { MatchExpression* emChild = emChildren[i]; invariant(nullptr != emChild->getTag()); scanState->ixtag = static_cast(emChild->getTag()); // If 'emChild' is a NOT, then the tag we're interested in is on the NOT's // child node. if (MatchExpression::NOT == emChild->matchType()) { invariant(nullptr != emChild->getChild(0)->getTag()); scanState->ixtag = static_cast(emChild->getChild(0)->getTag()); invariant(IndexTag::kNoIndex != scanState->ixtag->index); } if (shouldMergeWithLeaf(emChild, *scanState)) { // The child uses the same index we're currently building a scan for. Merge // the bounds and filters. verify(scanState->currentIndexNumber == scanState->ixtag->index); scanState->tightness = IndexBoundsBuilder::INEXACT_FETCH; mergeWithLeafNode(emChild, scanState); } else { if (nullptr != scanState->currentScan.get()) { finishAndOutputLeaf(scanState, out); } else { verify(IndexTag::kNoIndex == scanState->currentIndexNumber); } scanState->currentIndexNumber = scanState->ixtag->index; scanState->tightness = IndexBoundsBuilder::INEXACT_FETCH; scanState->currentScan = makeLeafNode(query, indices[scanState->currentIndexNumber], scanState->ixtag->pos, emChild, &scanState->tightness); } } // We're done processing the $elemMatch child. We leave it hanging off // it's AND parent so that it will be affixed as a filter later on, // and move on to the next child of the AND. ++scanState->curChild; return true; } bool QueryPlannerAccess::processIndexScansSubnode( const CanonicalQuery& query, ScanBuildingState* scanState, const QueryPlannerParams& params, std::vector>* out) { MatchExpression* root = scanState->root; MatchExpression* child = root->getChild(scanState->curChild); const vector& indices = scanState->indices; bool inArrayOperator = scanState->inArrayOperator; // We may detach the current child from the tree and assume ownership. std::unique_ptr ownedChild; if (MatchExpression::AND == root->matchType() && MatchExpression::ELEM_MATCH_OBJECT == child->matchType()) { return processIndexScansElemMatch(query, scanState, params, out); } else if (!inArrayOperator) { // The logical sub-tree is responsible for fully evaluating itself. Any required filters or // fetches are already hung on it. As such, we remove the filter branch from our tree and // assume ownership of it. ownedChild = std::move((*root->getChildVector())[scanState->curChild]); root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild); } else { ++scanState->curChild; } // If inArrayOperator: takes ownership of child, which is OK, since we detached // child from root. auto childSolution = _buildIndexedDataAccess(query, child, std::move(ownedChild), indices, params); if (!childSolution) { return false; } out->push_back(std::move(childSolution)); return true; } std::unique_ptr QueryPlannerAccess::buildIndexedAnd( const CanonicalQuery& query, MatchExpression* root, std::unique_ptr ownedRoot, const vector& indices, const QueryPlannerParams& params) { // Clone the match expression before passing it to processIndexScans(), as it may trim // predicates. If we end up with an index intersection solution, then we use our copy of the // match expression to be sure that the FETCH stage will recheck the entire predicate. It is not // correct to trim predicates for index intersection plans, as this can lead to spurious matches // (see SERVER-16750). auto clonedRoot = root->shallowClone(); std::vector> ixscanNodes; const bool inArrayOperator = !ownedRoot; if (!processIndexScans(query, root, inArrayOperator, indices, params, &ixscanNodes)) { return nullptr; } // // Process all non-indexed predicates. We hang these above the AND with a fetch and // filter. // // This is the node we're about to return. std::unique_ptr andResult; // We must use an index for at least one child of the AND. We shouldn't be here if this // isn't the case. verify(ixscanNodes.size() >= 1); // Short-circuit: an AND of one child is just the child. if (ixscanNodes.size() == 1) { andResult = std::move(ixscanNodes[0]); } else { // $** indexes are prohibited from participating in either AND_SORTED or AND_HASH. const bool wildcardIndexInvolvedInIntersection = std::any_of(ixscanNodes.begin(), ixscanNodes.end(), [](const auto& ixScan) { return ixScan->getType() == StageType::STAGE_IXSCAN && static_cast(ixScan.get())->index.type == INDEX_WILDCARD; }); if (wildcardIndexInvolvedInIntersection) { return nullptr; } // Figure out if we want AndHashNode or AndSortedNode. bool allSortedByDiskLoc = true; for (size_t i = 0; i < ixscanNodes.size(); ++i) { if (!ixscanNodes[i]->sortedByDiskLoc()) { allSortedByDiskLoc = false; break; } } if (allSortedByDiskLoc) { auto asn = std::make_unique(); asn->addChildren(std::move(ixscanNodes)); andResult = std::move(asn); } else if (internalQueryPlannerEnableHashIntersection.load()) { { auto ahn = std::make_unique(); ahn->addChildren(std::move(ixscanNodes)); andResult = std::move(ahn); } // The AndHashNode provides the sort order of its last child. If any of the // possible subnodes of AndHashNode provides the sort order we care about, we put // that one last. for (size_t i = 0; i < andResult->children.size(); ++i) { andResult->children[i]->computeProperties(); if (andResult->children[i]->providedSorts().contains( query.getFindCommandRequest().getSort())) { std::swap(andResult->children[i], andResult->children.back()); break; } } } else { // We can't use sort-based intersection, and hash-based intersection is disabled. // Clean up the index scans and bail out by returning NULL. LOGV2_DEBUG(20947, 5, "Can't build index intersection solution: AND_SORTED is not possible and " "AND_HASH is disabled"); return nullptr; } } // Don't bother doing any kind of fetch analysis lite if we're doing it anyway above us. if (inArrayOperator) { return andResult; } if (andResult->getType() == STAGE_AND_HASH || andResult->getType() == STAGE_AND_SORTED) { // We got an index intersection solution, so we aren't allowed to answer predicates exactly // using the index. This is because the index intersection stage finds documents that match // each index's predicate, but the document isn't guaranteed to be in a state where it // matches all indexed predicates simultaneously. Therefore, it is necessary to add a fetch // stage which will explicitly evaluate the entire predicate (see SERVER-16750). invariant(clonedRoot); auto fetch = std::make_unique(); fetch->filter = std::move(clonedRoot); // Takes ownership of 'andResult'. fetch->children.push_back(andResult.release()); return fetch; } // If there are any nodes still attached to the AND, we can't answer them using the // index, so we put a fetch with filter. if (root->numChildren() > 0) { auto fetch = std::make_unique(); verify(ownedRoot); if (ownedRoot->numChildren() == 1) { // An $and of one thing is that thing. fetch->filter = std::move((*ownedRoot->getChildVector())[0]); ownedRoot->getChildVector()->clear(); // 'autoRoot' will delete the empty $and. } else { // root->numChildren() > 1 // Takes ownership. fetch->filter = std::move(ownedRoot); } // takes ownership fetch->children.push_back(andResult.release()); andResult = std::move(fetch); } else { // root has no children, let autoRoot get rid of it when it goes out of scope. } return andResult; } std::unique_ptr QueryPlannerAccess::buildIndexedOr( const CanonicalQuery& query, MatchExpression* root, std::unique_ptr ownedRoot, const vector& indices, const QueryPlannerParams& params) { const bool inArrayOperator = !ownedRoot; std::vector> ixscanNodes; if (!processIndexScans(query, root, inArrayOperator, indices, params, &ixscanNodes)) { return nullptr; } // Unlike an AND, an OR cannot have filters hanging off of it. We stop processing // when any of our children lack index tags. If a node lacks an index tag it cannot // be answered via an index. if (!inArrayOperator && 0 != root->numChildren()) { LOGV2_WARNING(20948, "Planner OR error, non-indexed child of OR"); // We won't enumerate an OR without indices for each child, so this isn't an issue, even // if we have an AND with an OR child -- we won't get here unless the OR is fully // indexed. return nullptr; } // If all index scans are identical, then we collapse them into a single scan. This prevents // us from creating OR plans where the branches of the OR perform duplicate work. ixscanNodes = collapseEquivalentScans(std::move(ixscanNodes)); std::unique_ptr orResult; // An OR of one node is just that node. if (1 == ixscanNodes.size()) { orResult = std::move(ixscanNodes[0]); } else { std::vector shouldReverseScan; if (query.getSortPattern()) { // If all ixscanNodes can provide the sort, shouldReverseScan is populated with which // scans to reverse. shouldReverseScan = canProvideSortWithMergeSort(ixscanNodes, query.getFindCommandRequest().getSort()); } if (!shouldReverseScan.empty()) { // Each node can provide either the requested sort, or the reverse of the requested // sort. invariant(ixscanNodes.size() == shouldReverseScan.size()); for (size_t i = 0; i < ixscanNodes.size(); ++i) { if (shouldReverseScan[i]) { QueryPlannerCommon::reverseScans(ixscanNodes[i].get()); } } auto msn = std::make_unique(); msn->sort = query.getFindCommandRequest().getSort(); msn->addChildren(std::move(ixscanNodes)); orResult = std::move(msn); } else { auto orn = std::make_unique(); orn->addChildren(std::move(ixscanNodes)); orResult = std::move(orn); } } // Evaluate text nodes first to ensure that text scores are available. // Move text nodes to front of vector. std::stable_partition(orResult->children.begin(), orResult->children.end(), isTextNode); // OR must have an index for each child, so we should have detached all children from // 'root', and there's nothing useful to do with an empty or MatchExpression. We let it die // via autoRoot. return orResult; } std::unique_ptr QueryPlannerAccess::buildIndexedDataAccess( const CanonicalQuery& query, std::unique_ptr root, const vector& indices, const QueryPlannerParams& params) { MatchExpression* unownedRoot = root.get(); return _buildIndexedDataAccess(query, unownedRoot, std::move(root), indices, params); } std::unique_ptr QueryPlannerAccess::_buildIndexedDataAccess( const CanonicalQuery& query, MatchExpression* root, std::unique_ptr ownedRoot, const vector& indices, const QueryPlannerParams& params) { if (root->getCategory() == MatchExpression::MatchCategory::kLogical && !Indexability::isBoundsGeneratingNot(root)) { if (MatchExpression::AND == root->matchType()) { // Takes ownership of root. return buildIndexedAnd(query, root, std::move(ownedRoot), indices, params); } else if (MatchExpression::OR == root->matchType()) { // Takes ownership of root. return buildIndexedOr(query, root, std::move(ownedRoot), indices, params); } else { return nullptr; } } else { if (!root->getTag()) { // No index to use here, not in the context of logical operator, so we're SOL. return nullptr; } else if (Indexability::isBoundsGenerating(root)) { // Make an index scan over the tagged index #. IndexTag* tag = static_cast(root->getTag()); IndexBoundsBuilder::BoundsTightness tightness = IndexBoundsBuilder::EXACT; auto soln = makeLeafNode(query, indices[tag->index], tag->pos, root, &tightness); verify(nullptr != soln); finishLeafNode(soln.get(), indices[tag->index]); if (!ownedRoot) { // We're performing access planning for the child of an array operator such as // $elemMatch value. return soln; } // If the bounds are exact, the set of documents that satisfy the predicate is // exactly equal to the set of documents that the scan provides. // // If the bounds are not exact, the set of documents returned from the scan is a // superset of documents that satisfy the predicate, and we must check the // predicate. // We may also be able to avoid adding an extra fetch stage even though the bounds are // inexact because the query is counting null values on an indexed field without // projecting that field. if (tightness == IndexBoundsBuilder::EXACT || isCoveredNullQuery(query, root, tag, indices, params)) { return soln; } else if (tightness == IndexBoundsBuilder::INEXACT_COVERED && !indices[tag->index].multikey) { verify(nullptr == soln->filter.get()); soln->filter = std::move(ownedRoot); return soln; } else { auto fetch = std::make_unique(); fetch->filter = std::move(ownedRoot); fetch->children.push_back(soln.release()); return fetch; } } else if (Indexability::arrayUsesIndexOnChildren(root)) { std::unique_ptr solution; invariant(root->matchType() == MatchExpression::ELEM_MATCH_OBJECT); // The child is an AND. invariant(1 == root->numChildren()); // Recursively build a data access plan for the child of the $elemMatch object. We // maintain ownership of 'ownedRoot'. solution = _buildIndexedDataAccess(query, root->getChild(0), nullptr, indices, params); if (!solution) { return nullptr; } // There may be an array operator above us. if (!ownedRoot) { return solution; } auto fetch = std::make_unique(); fetch->filter = std::move(ownedRoot); fetch->children.push_back(solution.release()); return fetch; } } return nullptr; } std::unique_ptr QueryPlannerAccess::scanWholeIndex( const IndexEntry& index, const CanonicalQuery& query, const QueryPlannerParams& params, int direction) { std::unique_ptr solnRoot; // Build an ixscan over the id index, use it, and return it. unique_ptr isn = std::make_unique(index); isn->addKeyMetadata = query.metadataDeps()[DocumentMetadataFields::kIndexKey]; isn->queryCollator = query.getCollator(); IndexBoundsBuilder::allValuesBounds(index.keyPattern, &isn->bounds); if (-1 == direction) { QueryPlannerCommon::reverseScans(isn.get()); isn->direction = -1; } unique_ptr filter = query.root()->shallowClone(); // If it's find({}) remove the no-op root. if (MatchExpression::AND == filter->matchType() && (0 == filter->numChildren())) { solnRoot = std::move(isn); } else { // TODO: We may not need to do the fetch if the predicates in root are covered. But // for now it's safe (though *maybe* slower). unique_ptr fetch = std::make_unique(); fetch->filter = std::move(filter); fetch->children.push_back(isn.release()); solnRoot = std::move(fetch); } return solnRoot; } void QueryPlannerAccess::addFilterToSolutionNode(QuerySolutionNode* node, std::unique_ptr match, MatchExpression::MatchType type) { if (nullptr == node->filter) { node->filter = std::move(match); } else if (type == node->filter->matchType()) { // The 'node' already has either an AND or OR filter that matches 'type'. Add 'match' as // another branch of the filter. ListOfMatchExpression* listFilter = static_cast(node->filter.get()); listFilter->add(std::move(match)); } else { // The 'node' already has a filter that does not match 'type'. If 'type' is AND, then // combine 'match' with the existing filter by adding an AND. If 'type' is OR, combine // by adding an OR node. unique_ptr listFilter; if (MatchExpression::AND == type) { listFilter = std::make_unique(); } else { verify(MatchExpression::OR == type); listFilter = std::make_unique(); } unique_ptr oldFilter = node->filter->shallowClone(); listFilter->add(std::move(oldFilter)); listFilter->add(std::move(match)); node->filter = std::move(listFilter); } } void QueryPlannerAccess::handleFilter(ScanBuildingState* scanState) { if (MatchExpression::OR == scanState->root->matchType()) { handleFilterOr(scanState); } else if (MatchExpression::AND == scanState->root->matchType()) { handleFilterAnd(scanState); } else { // We must be building leaves for either and AND or an OR. MONGO_UNREACHABLE; } } void QueryPlannerAccess::handleFilterOr(ScanBuildingState* scanState) { MatchExpression* root = scanState->root; if (scanState->inArrayOperator) { // We're inside an array operator. The entire array operator expression // should always be affixed as a filter. We keep 'curChild' in the $and // for affixing later. ++scanState->curChild; } else { if (scanState->tightness < scanState->loosestBounds) { scanState->loosestBounds = scanState->tightness; } // Detach 'child' and add it to 'curOr'. auto child = std::move((*root->getChildVector())[scanState->curChild]); root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild); scanState->curOr->getChildVector()->push_back(std::move(child)); } } void QueryPlannerAccess::handleFilterAnd(ScanBuildingState* scanState) { MatchExpression* root = scanState->root; const IndexEntry& index = scanState->indices[scanState->currentIndexNumber]; if (scanState->inArrayOperator) { // We're inside an array operator. The entire array operator expression // should always be affixed as a filter. We keep 'curChild' in the $and // for affixing later. ++scanState->curChild; } else if (scanState->tightness == IndexBoundsBuilder::EXACT || scanState->isCoveredNullQuery) { // The tightness of the bounds is exact or we are dealing with a covered null query. // Either way, we want to remove this child so that when control returns to handleIndexedAnd // we know that we don't need it to create a FETCH stage. root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild); } else if (scanState->tightness == IndexBoundsBuilder::INEXACT_COVERED && (INDEX_TEXT == index.type || !index.multikey)) { // The bounds are not exact, but the information needed to // evaluate the predicate is in the index key. Remove the // MatchExpression from its parent and attach it to the filter // of the index scan we're building. // // We can only use this optimization if the index is NOT multikey. // Suppose that we had the multikey index {x: 1} and a document // {x: ["a", "b"]}. Now if we query for {x: /b/} the filter might // ever only be applied to the index key "a". We'd incorrectly // conclude that the document does not match the query :( so we // gotta stick to non-multikey indices. auto child = std::move((*root->getChildVector())[scanState->curChild]); root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild); addFilterToSolutionNode(scanState->currentScan.get(), std::move(child), root->matchType()); } else { // We keep curChild in the AND for affixing later. ++scanState->curChild; } } std::unique_ptr QueryPlannerAccess::makeIndexScan( const IndexEntry& index, const CanonicalQuery& query, const QueryPlannerParams& params, const BSONObj& startKey, const BSONObj& endKey) { std::unique_ptr solnRoot; // Build an ixscan over the id index, use it, and return it. auto isn = std::make_unique(index); isn->direction = 1; isn->addKeyMetadata = query.metadataDeps()[DocumentMetadataFields::kIndexKey]; isn->bounds.isSimpleRange = true; isn->bounds.startKey = startKey; isn->bounds.endKey = endKey; isn->bounds.boundInclusion = BoundInclusion::kIncludeStartKeyOnly; isn->queryCollator = query.getCollator(); unique_ptr filter = query.root()->shallowClone(); // If it's find({}) remove the no-op root. if (MatchExpression::AND == filter->matchType() && (0 == filter->numChildren())) { solnRoot = std::move(isn); } else { // TODO: We may not need to do the fetch if the predicates in root are covered. But // for now it's safe (though *maybe* slower). unique_ptr fetch = std::make_unique(); fetch->filter = std::move(filter); fetch->children.push_back(isn.release()); solnRoot = std::move(fetch); } return solnRoot; } } // namespace mongo