diff options
author | Bernard Gorman <bernard.gorman@gmail.com> | 2018-09-12 15:30:56 +0100 |
---|---|---|
committer | Bernard Gorman <bernard.gorman@gmail.com> | 2018-10-11 01:23:07 +0100 |
commit | 8dab2485c1badc2fd09b84667b4dcfc8b53fef63 (patch) | |
tree | 3479da286c74a91a75b1703f422aa886e59bd33d /src/mongo | |
parent | e01cdb673978f6d3a9d53bb3a6ba35b34c281b0e (diff) | |
download | mongo-8dab2485c1badc2fd09b84667b4dcfc8b53fef63.tar.gz |
SERVER-36517 Allow wildcard indexes to provide DISTINCT_SCAN
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/bson/ordering.h | 4 | ||||
-rw-r--r-- | src/mongo/db/exec/distinct_scan.cpp | 38 | ||||
-rw-r--r-- | src/mongo/db/exec/distinct_scan.h | 62 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 43 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.cpp | 77 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.h | 2 | ||||
-rw-r--r-- | src/mongo/db/query/planner_ixselect.cpp | 74 | ||||
-rw-r--r-- | src/mongo/db/query/planner_wildcard_helpers.cpp | 310 | ||||
-rw-r--r-- | src/mongo/db/query/planner_wildcard_helpers.h | 57 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_wildcard_index_test.cpp | 52 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.h | 6 | ||||
-rw-r--r-- | src/mongo/db/query/stage_builder.cpp | 19 | ||||
-rw-r--r-- | src/mongo/dbtests/query_stage_distinct.cpp | 26 |
15 files changed, 465 insertions, 318 deletions
diff --git a/src/mongo/bson/ordering.h b/src/mongo/bson/ordering.h index d6e26e0aadc..b407709f6ca 100644 --- a/src/mongo/bson/ordering.h +++ b/src/mongo/bson/ordering.h @@ -45,6 +45,8 @@ class Ordering { Ordering(unsigned b) : bits(b) {} public: + static constexpr size_t kMaxCompoundIndexKeys = size_t{32}; + Ordering(const Ordering& r) : bits(r.bits) {} void operator=(const Ordering& r) { bits = r.bits; @@ -78,7 +80,7 @@ public: BSONElement e = k.next(); if (e.eoo()) break; - uassert(13103, "too many compound keys", n <= 31); + uassert(13103, "too many compound keys", n < kMaxCompoundIndexKeys); if (e.number() < 0) b |= (1 << n); n++; diff --git a/src/mongo/db/exec/distinct_scan.cpp b/src/mongo/db/exec/distinct_scan.cpp index 5b1d5ccc545..90b91bcc49a 100644 --- a/src/mongo/db/exec/distinct_scan.cpp +++ b/src/mongo/db/exec/distinct_scan.cpp @@ -46,28 +46,22 @@ using stdx::make_unique; // static const char* DistinctScan::kStageType = "DISTINCT_SCAN"; -DistinctScan::DistinctScan(OperationContext* opCtx, - const DistinctParams& params, - WorkingSet* workingSet) +DistinctScan::DistinctScan(OperationContext* opCtx, DistinctParams params, WorkingSet* workingSet) : PlanStage(kStageType, opCtx), + _params(std::move(params)), _workingSet(workingSet), - _descriptor(params.descriptor), - _iam(params.descriptor->getIndexCatalog()->getIndex(params.descriptor)), - _params(params), - _checker(&_params.bounds, _descriptor->keyPattern(), _params.direction) { - _specificStats.keyPattern = _params.descriptor->keyPattern(); - if (BSONElement collationElement = _params.descriptor->getInfoElement("collation")) { - invariant(collationElement.isABSONObj()); - _specificStats.collation = collationElement.Obj().getOwned(); - } - _specificStats.indexName = _params.descriptor->indexName(); - _specificStats.indexVersion = static_cast<int>(_params.descriptor->version()); - _specificStats.isMultiKey = _params.descriptor->isMultikey(getOpCtx()); - _specificStats.multiKeyPaths = _params.descriptor->getMultikeyPaths(getOpCtx()); - _specificStats.isUnique = _params.descriptor->unique(); - _specificStats.isSparse = _params.descriptor->isSparse(); - _specificStats.isPartial = _params.descriptor->isPartial(); - _specificStats.direction = _params.direction; + _iam(_params.accessMethod), + _checker(&_params.bounds, _params.keyPattern, _params.scanDirection) { + _specificStats.keyPattern = _params.keyPattern; + _specificStats.indexName = _params.name; + _specificStats.indexVersion = static_cast<int>(_params.version); + _specificStats.isMultiKey = _params.isMultiKey; + _specificStats.multiKeyPaths = _params.multikeyPaths; + _specificStats.isUnique = _params.isUnique; + _specificStats.isSparse = _params.isSparse; + _specificStats.isPartial = _params.isPartial; + _specificStats.direction = _params.scanDirection; + _specificStats.collation = _params.collation.getOwned(); // Set up our initial seek. If there is no valid data, just mark as EOF. _commonStats.isEOF = !_checker.getStartSeekPoint(&_seekPoint); @@ -80,7 +74,7 @@ PlanStage::StageState DistinctScan::doWork(WorkingSetID* out) { boost::optional<IndexKeyEntry> kv; try { if (!_cursor) - _cursor = _iam->newCursor(getOpCtx(), _params.direction == 1); + _cursor = _iam->newCursor(getOpCtx(), _params.scanDirection == 1); kv = _cursor->seek(_seekPoint); } catch (const WriteConflictException&) { *out = WorkingSet::INVALID_ID; @@ -119,7 +113,7 @@ PlanStage::StageState DistinctScan::doWork(WorkingSetID* out) { WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->recordId = kv->loc; - member->keyData.push_back(IndexKeyDatum(_descriptor->keyPattern(), kv->key, _iam)); + member->keyData.push_back(IndexKeyDatum(_params.keyPattern, kv->key, _iam)); _workingSet->transitionToRecordIdAndIdx(id); *out = id; diff --git a/src/mongo/db/exec/distinct_scan.h b/src/mongo/db/exec/distinct_scan.h index 44ae137dac6..debec3bb30e 100644 --- a/src/mongo/db/exec/distinct_scan.h +++ b/src/mongo/db/exec/distinct_scan.h @@ -42,16 +42,51 @@ class IndexAccessMethod; class IndexDescriptor; class WorkingSet; -// TODO SERVER-36517: keyPattern, indexName and multikeyPaths info should be provided explicitly -// here and adopted by DistinctScan, rather than being resolved via the IndexDescriptor. struct DistinctParams { - DistinctParams() : descriptor(NULL), direction(1), fieldNo(0) {} + DistinctParams(const IndexDescriptor& descriptor, + std::string indexName, + BSONObj keyPattern, + MultikeyPaths multikeyPaths, + bool multikey) + : accessMethod(descriptor.getIndexCatalog()->getIndex(&descriptor)), + name(std::move(indexName)), + keyPattern(std::move(keyPattern)), + multikeyPaths(std::move(multikeyPaths)), + isMultiKey(multikey), + isSparse(descriptor.isSparse()), + isUnique(descriptor.unique()), + isPartial(descriptor.isPartial()), + version(descriptor.version()), + collation(descriptor.infoObj() + .getObjectField(IndexDescriptor::kCollationFieldName) + .getOwned()) { + invariant(accessMethod); + } + + DistinctParams(OperationContext* opCtx, const IndexDescriptor& descriptor) + : DistinctParams(descriptor, + descriptor.indexName(), + descriptor.keyPattern(), + descriptor.getMultikeyPaths(opCtx), + descriptor.isMultikey(opCtx)) {} + + const IndexAccessMethod* accessMethod; + std::string name; + + BSONObj keyPattern; + + MultikeyPaths multikeyPaths; + bool isMultiKey; + + bool isSparse; + bool isUnique; + bool isPartial; - // What index are we traversing? - const IndexDescriptor* descriptor; + IndexDescriptor::IndexVersion version; - // And in what direction? - int direction; + BSONObj collation; + + int scanDirection{1}; // What are the bounds? IndexBounds bounds; @@ -61,7 +96,7 @@ struct DistinctParams { // If we have an index {a:1, b:1} we could use it to distinct over either 'a' or 'b'. // If we distinct over 'a' the position is 0. // If we distinct over 'b' the position is 1. - int fieldNo; + int fieldNo{0}; }; /** @@ -75,7 +110,7 @@ struct DistinctParams { */ class DistinctScan final : public PlanStage { public: - DistinctScan(OperationContext* opCtx, const DistinctParams& params, WorkingSet* workingSet); + DistinctScan(OperationContext* opCtx, DistinctParams params, WorkingSet* workingSet); StageState doWork(WorkingSetID* out) final; bool isEOF() final; @@ -95,18 +130,17 @@ public: static const char* kStageType; private: + // The parameters used to configure this DistinctScan stage. + DistinctParams _params; + // The WorkingSet we annotate with results. Not owned by us. WorkingSet* _workingSet; - // Index access. - const IndexDescriptor* _descriptor; // owned by Collection -> IndexCatalog - const IndexAccessMethod* _iam; // owned by Collection -> IndexCatalog + const IndexAccessMethod* _iam; // The cursor we use to navigate the tree. std::unique_ptr<SortedDataInterface::Cursor> _cursor; - DistinctParams _params; - // _checker gives us our start key and ensures we stay in bounds. IndexBoundsChecker _checker; IndexSeekPoint _seekPoint; diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 161b73386dc..572d7007b39 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -66,6 +66,7 @@ #include "mongo/db/query/plan_executor.h" #include "mongo/db/query/planner_access.h" #include "mongo/db/query/planner_analysis.h" +#include "mongo/db/query/planner_wildcard_helpers.h" #include "mongo/db/query/query_knobs.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/query/query_planner_common.h" @@ -117,6 +118,7 @@ void filterAllowedIndexEntries(const AllowedIndicesFilter& allowedIndicesFilter, } namespace { +namespace wcp = ::mongo::wildcard_planning; // The body is below in the "count hack" section but getExecutor calls it. bool turnIxscanIntoCount(QuerySolution* soln); } // namespace @@ -1335,6 +1337,22 @@ bool turnIxscanIntoDistinctIxscan(QuerySolution* soln, indexScanNode = static_cast<IndexScanNode*>(fetchNode->children[0]); } + if (indexScanNode->index.type == IndexType::INDEX_WILDCARD) { + // If the query is on a field other than the distinct key, we may have generated a $** plan + // which does not actually contain the distinct key field. + if (field != std::next(indexScanNode->index.keyPattern.begin())->fieldName()) { + return false; + } + // If the query includes object bounds, we cannot turn this IXSCAN into a DISTINCT_SCAN. + // Wildcard indexes contain multiple keys per object, one for each subpath in ascending + // (Path, Value, RecordId) order. If the distinct fields in two successive documents are + // objects with the same leaf path values but in different field order, e.g. {a: 1, b: 2} + // and {b: 2, a: 1}, we would therefore only return the first document and skip the other. + if (wcp::isWildcardObjectSubpathScan(indexScanNode)) { + return false; + } + } + // An additional filter must be applied to the data in the key, so we can't just skip // all the keys with a given value; we must examine every one to find the one that (may) // pass the filter. @@ -1440,16 +1458,24 @@ namespace { QueryPlannerParams fillOutPlannerParamsForDistinct(OperationContext* opCtx, Collection* collection, size_t plannerOptions, - const std::string& distinctKey) { + const ParsedDistinct& parsedDistinct) { QueryPlannerParams plannerParams; plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN | plannerOptions; IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(opCtx, false); + auto query = parsedDistinct.getQuery()->getQueryRequest().getFilter(); while (ii.more()) { const IndexDescriptor* desc = ii.next(); IndexCatalogEntry* ice = ii.catalogEntry(desc); - if (desc->keyPattern().hasField(distinctKey)) { + if (desc->keyPattern().hasField(parsedDistinct.getKey())) { plannerParams.indices.push_back(indexEntryFromIndexCatalogEntry(opCtx, *ice)); + } else if (desc->getIndexType() == IndexType::INDEX_WILDCARD && !query.isEmpty()) { + // Check whether the $** projection captures the field over which we are distinct-ing. + const auto proj = WildcardKeyGenerator::createProjectionExec(desc->keyPattern(), + desc->pathProjection()); + if (proj->applyProjectionToOneField(parsedDistinct.getKey())) { + plannerParams.indices.push_back(indexEntryFromIndexCatalogEntry(opCtx, *ice)); + } } } @@ -1485,9 +1511,8 @@ Status getExecutorForSimpleDistinct(OperationContext* opCtx, invariant(queryOrExecutor->cq); invariant(!queryOrExecutor->executor); - // If there's no query, we can just distinct-scan one of the indices. - // Not every index in plannerParams.indices may be suitable. Refer to - // getDistinctNodeIndex(). + // If there's no query, we can just distinct-scan one of the indices. Not every index in + // plannerParams.indices may be suitable. Refer to getDistinctNodeIndex(). size_t distinctNodeIndex = 0; if (!parsedDistinct->getQuery()->getQueryRequest().getFilter().isEmpty() || !parsedDistinct->getQuery()->getQueryRequest().getSort().isEmpty() || @@ -1632,8 +1657,8 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct( // We go through normal planning (with limited parameters) to see if we can produce // a soln with the above properties. - auto plannerParams = fillOutPlannerParamsForDistinct( - opCtx, collection, plannerOptions, parsedDistinct->getKey()); + auto plannerParams = + fillOutPlannerParamsForDistinct(opCtx, collection, plannerOptions, *parsedDistinct); const ExtensionsCallbackReal extensionsCallback(opCtx, &collection->ns()); @@ -1657,8 +1682,8 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorDistinct( auto qr = stdx::make_unique<QueryRequest>(parsedDistinct->getQuery()->getQueryRequest()); // Applying a projection allows the planner to try to give us covered plans that we can turn - // into the projection hack. getDistinctProjection deals with .find() projection semantics - // (ie _id:1 being implied by default). + // into the projection hack. The getDistinctProjection() function deals with .find() projection + // semantics (ie _id:1 being implied by default). if (qr->getProj().isEmpty()) { BSONObj projection = getDistinctProjection(parsedDistinct->getKey()); qr->setProj(projection); diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 0962ded1297..6fcf5e8ceb1 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -57,7 +57,7 @@ namespace mongo { namespace { -namespace app = ::mongo::wildcard_planning; +namespace wcp = ::mongo::wildcard_planning; // Helper for checking that an OIL "appears" to be ascending given one interval. void assertOILIsAscendingLocally(const vector<Interval>& intervals, size_t idx) { @@ -345,7 +345,7 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr, // adjusted regardless of the predicate. Having filled out the initial bounds, we apply any // necessary changes to the tightness here. if (index.type == IndexType::INDEX_WILDCARD) { - *tightnessOut = app::translateWildcardIndexBoundsAndTightness(index, *tightnessOut, oilOut); + *tightnessOut = wcp::translateWildcardIndexBoundsAndTightness(index, *tightnessOut, oilOut); } } diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index 6cc93126dad..7bcb08f0832 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -57,7 +57,7 @@ namespace { using namespace mongo; -namespace app = ::mongo::wildcard_planning; +namespace wcp = ::mongo::wildcard_planning; namespace dps = ::mongo::dotted_path_support; /** @@ -530,79 +530,6 @@ void QueryPlannerAccess::finishTextNode(QuerySolutionNode* node, const IndexEntr tn->indexPrefix = prefixBob.obj(); } -void QueryPlannerAccess::finishWildcardIndexScanNode(QuerySolutionNode* node, - const IndexEntry& plannerIndex) { - // We should only ever reach this point if we are an IndexScanNode for a $** index. - invariant(plannerIndex.type == IndexType::INDEX_WILDCARD); - invariant(node && node->getType() == STAGE_IXSCAN); - - // Sanity check the QuerySolutionNode's copy of the IndexEntry. - IndexScanNode* scan = static_cast<IndexScanNode*>(node); - invariant(scan->index.multikeyPaths.size() == 1); - invariant(scan->index == plannerIndex); - - // Obtain some references to make the remainder of this function more legible. - auto& bounds = scan->bounds; - auto& index = scan->index; - - // For $** indexes, the IndexEntry key pattern is {'path.to.field': ±1} but the actual keys in - // the index are of the form {'$_path': ±1, 'path.to.field': ±1}, where the value of the first - // field in each key is 'path.to.field'. We push a new entry into the bounds vector for the - // leading '$_path' bound here. We also push corresponding fields into the IndexScanNode's - // keyPattern and its multikeyPaths vector. - invariant(plannerIndex.keyPattern.nFields() == 1); - invariant(bounds.fields.size() == 1); - invariant(!bounds.fields.front().name.empty()); - index.multikeyPaths.insert(index.multikeyPaths.begin(), std::set<std::size_t>{}); - bounds.fields.insert(bounds.fields.begin(), {"$_path"}); - index.keyPattern = - BSON("$_path" << index.keyPattern.firstElement() << index.keyPattern.firstElement()); - - // Create a FieldRef to perform any necessary manipulations on the query path string. - FieldRef queryPath{plannerIndex.keyPattern.firstElementFieldName()}; - auto& multikeyPaths = index.multikeyPaths.back(); - - // Helper function to check whether the final path component in 'queryPath' is an array index. - const auto lastFieldIsArrayIndex = [&multikeyPaths](const auto& queryPath) { - return (queryPath.numParts() > 1u && multikeyPaths.count(queryPath.numParts() - 2u) && - queryPath.isNumericPathComponentStrict(queryPath.numParts() - 1u)); - }; - - // If the bounds intersect with any objects, we must add bounds that encompass all its - // subpaths, specifically the interval ["path.","path/") on "$_path". - const bool requiresSubpathBounds = app::requiresSubpathBounds(bounds.fields.back()); - - // For bounds which contain objects, we build a range interval on all subpaths of the query - // path(s). We must therefore trim any trailing array indices from the query path before - // generating the fieldname-or-array power set, in order to avoid overlapping the final set of - // bounds. For instance, the untrimmed query path 'a.0' will produce paths 'a' and 'a.0' if 'a' - // is multikey, and so we would end up with bounds [['a','a'], ['a.','a/'], ['a.0','a.0'], - // ['a.0.','a.0/']]. The latter two are subsets of the ['a.', 'a/'] interval. - while (requiresSubpathBounds && lastFieldIsArrayIndex(queryPath)) { - queryPath.removeLastPart(); - } - - // Account for fieldname-or-array-index semantics. $** indexes do not explicitly encode array - // indices in their keys, so if this query traverses one or more multikey fields via an array - // index (e.g. query 'a.0.b' where 'a' is an array), then we must generate bounds on all array- - // and non-array permutations of the path in order to produce INEXACT_FETCH bounds. - auto paths = app::generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath); - - // Add a $_path point-interval for each path that needs to be traversed in the index. If the - // bounds on these paths include objects, then for each applicable path we must add a range - // interval on all its subpaths, i.e. ["path.","path/"). - static const char subPathStart = '.', subPathEnd = static_cast<char>('.' + 1); - auto& pathIntervals = bounds.fields.front().intervals; - for (const auto& fieldPath : paths) { - auto path = fieldPath.dottedField().toString(); - pathIntervals.push_back(IndexBoundsBuilder::makePointInterval(path)); - if (requiresSubpathBounds) { - pathIntervals.push_back(IndexBoundsBuilder::makeRangeInterval( - path + subPathStart, path + subPathEnd, BoundInclusion::kIncludeStartKeyOnly)); - } - } -} - bool QueryPlannerAccess::orNeedsFetch(const ScanBuildingState* scanState) { if (scanState->loosestBounds == IndexBoundsBuilder::EXACT) { return false; @@ -675,7 +602,7 @@ void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntr // If this is a $** index, update and populate the keyPattern, bounds, and multikeyPaths. if (index.type == IndexType::INDEX_WILDCARD) { - finishWildcardIndexScanNode(node, index); + wcp::finalizeWildcardIndexScanConfiguration(nodeIndex, bounds); } // Find the first field in the scan's bounds that was not filled out. diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h index fec5ef6b793..e2097f2ab62 100644 --- a/src/mongo/db/query/planner_access.h +++ b/src/mongo/db/query/planner_access.h @@ -388,8 +388,6 @@ private: static void finishTextNode(QuerySolutionNode* node, const IndexEntry& index); - static void finishWildcardIndexScanNode(QuerySolutionNode* node, const IndexEntry& index); - /** * Add the filter 'match' to the query solution node 'node'. Takes * ownership of 'match'. diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp index 4fa9142c12c..62771cbf8b6 100644 --- a/src/mongo/db/query/planner_ixselect.cpp +++ b/src/mongo/db/query/planner_ixselect.cpp @@ -52,82 +52,12 @@ namespace mongo { namespace { -namespace app = ::mongo::wildcard_planning; +namespace wcp = ::mongo::wildcard_planning; std::size_t numPathComponents(StringData path) { return FieldRef{path}.numParts(); } -/** - * Given a single wildcard index, and a set of fields which are being queried, create 'mock' - * IndexEntry for each of the appropriate fields. - */ -void expandIndex(const IndexEntry& wildcardIndex, - const stdx::unordered_set<std::string>& fields, - vector<IndexEntry>* out) { - invariant(out); - invariant(wildcardIndex.type == INDEX_WILDCARD); - // Should only have one field of the form {"path.$**" : 1}. - invariant(wildcardIndex.keyPattern.nFields() == 1); - - // $** indexes do not keep the multikey metadata inside the index catalog entry, as the amount - // of metadata is not bounded. We do not expect IndexEntry objects for $** indexes to have a - // fixed-size vector of multikey metadata until after they are expanded. - invariant(wildcardIndex.multikeyPaths.empty()); - - const auto projExec = WildcardKeyGenerator::createProjectionExec( - wildcardIndex.keyPattern, wildcardIndex.infoObj.getObjectField("wildcardProjection")); - - const auto projectedFields = projExec->applyProjectionToFields(fields); - - const auto& includedPaths = projExec->getExhaustivePaths(); - - out->reserve(out->size() + projectedFields.size()); - for (auto&& fieldName : projectedFields) { - // Convert string 'fieldName' into a FieldRef, to better facilitate the subsequent checks. - const auto queryPath = FieldRef{fieldName}; - // $** indices hold multikey metadata directly in the index keys, rather than in the index - // catalog. In turn, the index key data is used to produce a set of multikey paths - // in-memory. Here we convert this set of all multikey paths into a MultikeyPaths vector - // which will indicate to the downstream planning code which components of 'fieldName' are - // multikey. - auto multikeyPaths = app::buildMultiKeyPathsForExpandedWildcardIndexEntry( - queryPath, wildcardIndex.multikeyPathSet); - - // Check whether a query on the current fieldpath is answerable by the $** index, given any - // numerical path components that may be present in the path string. - if (!app::validateNumericPathComponents(multikeyPaths, includedPaths, queryPath)) { - continue; - } - - // The expanded IndexEntry is only considered multikey if the particular path represented by - // this IndexEntry has a multikey path component. For instance, suppose we have index {$**: - // 1} with "a" as the only multikey path. If we have a query on paths "a.b" and "c.d", then - // we will generate two expanded index entries: one for "a.b" and "c.d". The "a.b" entry - // will be marked as multikey because "a" is multikey, whereas the "c.d" entry will not be - // marked as multikey. - invariant(multikeyPaths.size() == 1u); - const bool isMultikey = !multikeyPaths[0].empty(); - - IndexEntry entry(BSON(fieldName << wildcardIndex.keyPattern.firstElement()), - IndexType::INDEX_WILDCARD, - isMultikey, - std::move(multikeyPaths), - // Expanded index entries always use the fixed-size multikey paths - // representation, so we purposefully discard 'multikeyPathSet'. - {}, - true, // sparse - false, // unique - {wildcardIndex.identifier.catalogName, fieldName}, - wildcardIndex.filterExpr, - wildcardIndex.infoObj, - wildcardIndex.collator); - - invariant("$_path"_sd != fieldName); - out->push_back(std::move(entry)); - } -} - bool canUseWildcardIndex(BSONElement elt, MatchExpression::MatchType matchType) { if (elt.type() == BSONType::Object) { // $** indices break nested objects into separate keys, which means we can't naturally @@ -378,7 +308,7 @@ std::vector<IndexEntry> QueryPlannerIXSelect::expandIndexes( std::vector<IndexEntry> out; for (auto&& entry : relevantIndices) { if (entry.type == IndexType::INDEX_WILDCARD) { - expandIndex(entry, fields, &out); + wcp::expandWildcardIndexEntry(entry, fields, &out); } else { out.push_back(entry); } diff --git a/src/mongo/db/query/planner_wildcard_helpers.cpp b/src/mongo/db/query/planner_wildcard_helpers.cpp index d928fc47638..1c0610823be 100644 --- a/src/mongo/db/query/planner_wildcard_helpers.cpp +++ b/src/mongo/db/query/planner_wildcard_helpers.cpp @@ -35,27 +35,14 @@ #include <vector> #include "mongo/bson/util/builder.h" +#include "mongo/db/index/wildcard_key_generator.h" +#include "mongo/db/query/index_bounds.h" #include "mongo/util/log.h" namespace mongo { namespace wildcard_planning { namespace { /** - * Returns an OIL that called 'name', with just the interval [{}, []). - */ -OrderedIntervalList getAllObjectsOIL(const std::string& name) { - OrderedIntervalList oil(name); - BSONObjBuilder bob; - bob.appendMinForType("", BSONType::Object); - bob.appendMaxForType("", BSONType::Object); - oil.intervals.push_back(IndexBoundsBuilder::makeRangeInterval( - bob.obj(), BoundInclusion::kExcludeBothStartAndEndKeys)); - - return oil; -} - - -/** * Compares the path 'fieldNameOrArrayIndexPath' to 'staticComparisonPath', ignoring any array * indices present in the former if they are not present in the latter. The 'multikeyPathComponents' * set contains the path positions that are known to be arrays; only numerical path components that @@ -114,7 +101,9 @@ bool fieldNameOrArrayIndexPathSetContains(const std::set<FieldRef>& multikeyPath /** * Returns the positions of all path components in 'queryPath' that may be interpreted as array * indices by the query system. We obtain this list by finding all multikey path components that - * have a numerical path component immediately after. + * have a numerical path component immediately after. Note that the 'queryPath' argument may be a + * prefix of the full path used to generate 'multikeyPaths', and so we must avoid checking path + * components beyond the end of 'queryPath'. */ std::vector<size_t> findArrayIndexPathComponents(const std::set<std::size_t>& multikeyPaths, const FieldRef& queryPath) { @@ -149,8 +138,11 @@ FieldRef pathWithoutSpecifiedComponents(const FieldRef& path, } return FieldRef{ss.str()}; } -} // namespace +/** + * Returns a MultikeyPaths which indicates which components of 'indexedPath' are multikey, by + * looking up multikeyness in 'multikeyPathSet'. + */ MultikeyPaths buildMultiKeyPathsForExpandedWildcardIndexEntry( const FieldRef& indexedPath, const std::set<FieldRef>& multikeyPathSet) { FieldRef pathToLookup; @@ -193,44 +185,25 @@ std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size return paths; } -BoundsTightness translateWildcardIndexBoundsAndTightness(const IndexEntry& index, - BoundsTightness tightnessIn, - OrderedIntervalList* oil) { - // This method should only ever be called for a $** IndexEntry. We expect to be called during - // planning, *before* finishWildcardIndexScanNode has been invoked. The IndexEntry should thus - // only have a single keyPattern field and multikeyPath entry, but this is sufficient to - // determine whether it will be necessary to adjust the tightness. - invariant(index.type == IndexType::INDEX_WILDCARD); - invariant(index.keyPattern.nFields() == 1); - invariant(index.multikeyPaths.size() == 1); - invariant(oil); - - auto* intervals = &oil->intervals; - - // If our bounds include any objects -- that is, anything in the range [{}, []) -- then we will - // need to use "subpath bounds", and include the additional interval ["path.","path/") on - // "$_path". - if (requiresSubpathBounds(*oil) && !intervals->front().isMinToMax()) { - // If we require subpath bounds (and aren't already using bounds from MinKey to MaxKey), - // then we must use [MinKey, MaxKey] bounds for the value. For example, say our value - // bounds are [[MinKey, undefined), (null, MaxKey]] (the bounds generated by a {$ne: null} - // query). Our result set should include documents such as {a: {b: null}}. However, the - // index key for this object will be {"": "a.b", "": null}. This means if we were to use - // the original bounds, we would miss this document. We therefore expand the value bounds - // to included everything. We must also adjust the tightness to be inexact, to avoid false - // positives. - *intervals = {IndexBoundsBuilder::allValues()}; - return BoundsTightness::INEXACT_FETCH; - } - - // If the query passes through any array indices, we must always fetch and filter the documents. - const auto arrayIndicesTraversedByQuery = findArrayIndexPathComponents( - index.multikeyPaths.front(), FieldRef{index.keyPattern.firstElementFieldName()}); - - // If the list of array indices we traversed is non-empty, set the tightness to INEXACT_FETCH. - return (arrayIndicesTraversedByQuery.empty() ? tightnessIn : BoundsTightness::INEXACT_FETCH); -} - +/** + * Returns false if 'queryPath' includes any numerical path components which render it unanswerable + * by the $** index, true otherwise. Specifically, the $** index cannot answer the query if either + * of the following scenarios occur: + * + * - The query path traverses through more than 'kWildcardMaxArrayIndexTraversalDepth' nested arrays + * via explicit array indices. + * - The query path lies along a $** projection through an array index. + * + * For an example of the latter case, say that our query path is 'a.0.b', our projection includes + * {'a.0': 1}, and 'a' is multikey. The query semantics will match 'a.0.b' by either field name or + * array index against the documents, but because the $** index always projects numeric path + * components strictly as field names, the projection {'a.0': 1} cannot correctly support this + * query. + * + * To see why, consider the document {a: [1, 2, 3]}. Query {'a.0': 1} will match this document, but + * the projection {'a.0': 1} will produce output document {a: []}, and so we will not index any of + * the values [1, 2, 3] for 'a'. + */ bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths, const std::set<FieldRef>& includedPaths, const FieldRef& queryPath) { @@ -275,11 +248,228 @@ bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths, return arrayIndices[0] >= includePath->numParts(); } -bool requiresSubpathBounds(const OrderedIntervalList& oil) { - // For intersectize() to work, the OILs' names must match. - OrderedIntervalList allObjects = getAllObjectsOIL(oil.name); - IndexBoundsBuilder::intersectize(oil, &allObjects); - return !allObjects.intervals.empty(); +/** + * Queries whose bounds overlap the Object type bracket may require special handling, since the $** + * index does not index complete objects but instead only contains the leaves along each of its + * subpaths. Since we ban all object-value queries except those on the empty object {}, this will + * typically only be relevant for bounds involving MinKey and MaxKey, such as {$exists: true}. + */ +bool boundsOverlapObjectTypeBracket(const OrderedIntervalList& oil) { + // Create an Interval representing the subrange ({}, []) of the object type bracket. We exclude + // both ends of the bracket because $** indexes support queries on empty objects and arrays. + static const Interval objectTypeBracketBounds = []() { + BSONObjBuilder objBracketBounds; + objBracketBounds.appendMinForType("", BSONType::Object); + objBracketBounds.appendMaxForType("", BSONType::Object); + return IndexBoundsBuilder::makeRangeInterval(objBracketBounds.obj(), + BoundInclusion::kExcludeBothStartAndEndKeys); + }(); + + // Determine whether any of the ordered intervals overlap with the object type bracket. If the + // current interval precedes the bracket, we must check the next interval in sequence. If the + // interval succeeds the bracket then we can stop checking, since the ordered list is never + // descending. If we neither precede nor succeed the object type bracket, then we overlap it. + invariant(oil.computeDirection() != Interval::Direction::kDirectionDescending); + for (const auto& interval : oil.intervals) { + switch (interval.compare(objectTypeBracketBounds)) { + case Interval::IntervalComparison::INTERVAL_PRECEDES_COULD_UNION: + case Interval::IntervalComparison::INTERVAL_PRECEDES: + // Break out of the switch and proceed to check the next interval. + break; + + case Interval::IntervalComparison::INTERVAL_SUCCEEDS: + return false; + + default: + return true; + } + } + // If we're here, then all the OIL's bounds precede the object type bracket. + return false; +} +} // namespace + +void expandWildcardIndexEntry(const IndexEntry& wildcardIndex, + const stdx::unordered_set<std::string>& fields, + std::vector<IndexEntry>* out) { + invariant(out); + invariant(wildcardIndex.type == INDEX_WILDCARD); + // Should only have one field of the form {"path.$**" : 1}. + invariant(wildcardIndex.keyPattern.nFields() == 1); + invariant(wildcardIndex.keyPattern.firstElement().fieldNameStringData().endsWith("$**")); + + // $** indexes do not keep the multikey metadata inside the index catalog entry, as the amount + // of metadata is not bounded. We do not expect IndexEntry objects for $** indexes to have a + // fixed-size vector of multikey metadata until after they are expanded. + invariant(wildcardIndex.multikeyPaths.empty()); + + const auto projExec = WildcardKeyGenerator::createProjectionExec( + wildcardIndex.keyPattern, wildcardIndex.infoObj.getObjectField("wildcardProjection")); + + const auto projectedFields = projExec->applyProjectionToFields(fields); + + const auto& includedPaths = projExec->getExhaustivePaths(); + + out->reserve(out->size() + projectedFields.size()); + for (auto&& fieldName : projectedFields) { + // Convert string 'fieldName' into a FieldRef, to better facilitate the subsequent checks. + auto queryPath = FieldRef{fieldName}; + // $** indices hold multikey metadata directly in the index keys, rather than in the index + // catalog. In turn, the index key data is used to produce a set of multikey paths + // in-memory. Here we convert this set of all multikey paths into a MultikeyPaths vector + // which will indicate to the downstream planning code which components of 'fieldName' are + // multikey. + auto multikeyPaths = buildMultiKeyPathsForExpandedWildcardIndexEntry( + queryPath, wildcardIndex.multikeyPathSet); + + // Check whether a query on the current fieldpath is answerable by the $** index, given any + // numerical path components that may be present in the path string. + if (!validateNumericPathComponents(multikeyPaths, includedPaths, queryPath)) { + continue; + } + + // The expanded IndexEntry is only considered multikey if the particular path represented by + // this IndexEntry has a multikey path component. For instance, suppose we have index {$**: + // 1} with "a" as the only multikey path. If we have a query on paths "a.b" and "c.d", then + // we will generate two expanded index entries: one for "a.b" and "c.d". The "a.b" entry + // will be marked as multikey because "a" is multikey, whereas the "c.d" entry will not be + // marked as multikey. + invariant(multikeyPaths.size() == 1u); + const bool isMultikey = !multikeyPaths[0].empty(); + + IndexEntry entry(BSON(fieldName << wildcardIndex.keyPattern.firstElement()), + IndexType::INDEX_WILDCARD, + isMultikey, + std::move(multikeyPaths), + // Expanded index entries always use the fixed-size multikey paths + // representation, so we purposefully discard 'multikeyPathSet'. + {}, + true, // sparse + false, // unique + {wildcardIndex.identifier.catalogName, fieldName}, + wildcardIndex.filterExpr, + wildcardIndex.infoObj, + wildcardIndex.collator); + + invariant("$_path"_sd != fieldName); + out->push_back(std::move(entry)); + } +} + +BoundsTightness translateWildcardIndexBoundsAndTightness(const IndexEntry& index, + BoundsTightness tightnessIn, + OrderedIntervalList* oil) { + // This method should only ever be called for a $** IndexEntry. We expect to be called during + // planning, *before* finishWildcardIndexScanNode has been invoked. The IndexEntry should thus + // only have a single keyPattern field and multikeyPath entry, but this is sufficient to + // determine whether it will be necessary to adjust the tightness. + invariant(index.type == IndexType::INDEX_WILDCARD); + invariant(index.keyPattern.nFields() == 1); + invariant(index.multikeyPaths.size() == 1); + invariant(oil); + + // If our bounds include any objects -- anything in the range ({}, []) -- then we will need to + // use subpath bounds; that is, we will add the interval ["path.","path/") at the point where we + // finalize the index scan. If the subpath interval is required but the bounds do not already + // run from MinKey to MaxKey, then we must expand them to [MinKey, MaxKey]. Consider the case + // where out bounds are [[MinKey, undefined), (null, MaxKey]] as generated by {$ne: null}. Our + // result set should include documents such as {a: {b: null}}; however, the wildcard index key + // for this object will be {"": "a.b", "": null}, which means that the original bounds would + // skip this document. We must also set the tightness to INEXACT_FETCH to avoid false positives. + if (boundsOverlapObjectTypeBracket(*oil) && !oil->intervals.front().isMinToMax()) { + oil->intervals = {IndexBoundsBuilder::allValues()}; + return BoundsTightness::INEXACT_FETCH; + } + + // If the query passes through any array indices, we must always fetch and filter the documents. + const auto arrayIndicesTraversedByQuery = findArrayIndexPathComponents( + index.multikeyPaths.front(), FieldRef{index.keyPattern.firstElementFieldName()}); + + // If the list of array indices we traversed is non-empty, set the tightness to INEXACT_FETCH. + return (arrayIndicesTraversedByQuery.empty() ? tightnessIn : BoundsTightness::INEXACT_FETCH); +} + +void finalizeWildcardIndexScanConfiguration(IndexEntry* index, IndexBounds* bounds) { + // We should only ever reach this point when processing a $** index. Sanity check the arguments. + invariant(index && index->type == IndexType::INDEX_WILDCARD); + invariant(index->keyPattern.nFields() == 1); + invariant(index->multikeyPaths.size() == 1); + invariant(bounds && bounds->fields.size() == 1); + invariant(bounds->fields.front().name == index->keyPattern.firstElementFieldName()); + + // For $** indexes, the IndexEntry key pattern is {'path.to.field': ±1} but the actual keys in + // the index are of the form {'$_path': ±1, 'path.to.field': ±1}, where the value of the first + // field in each key is 'path.to.field'. We push a new entry into the bounds vector for the + // leading '$_path' bound here. We also push corresponding fields into the IndexScanNode's + // keyPattern and its multikeyPaths vector. + index->multikeyPaths.insert(index->multikeyPaths.begin(), std::set<std::size_t>{}); + bounds->fields.insert(bounds->fields.begin(), {"$_path"}); + index->keyPattern = + BSON("$_path" << index->keyPattern.firstElement() << index->keyPattern.firstElement()); + + // Create a FieldRef to perform any necessary manipulations on the query path string. + FieldRef queryPath{std::next(index->keyPattern.begin())->fieldNameStringData()}; + auto& multikeyPaths = index->multikeyPaths.back(); + + // If the bounds overlap the object type bracket, then we must retrieve all documents which + // include the given path. We must therefore add bounds that encompass all its subpaths, + // specifically the interval ["path.","path/") on "$_path". + const bool requiresSubpathBounds = boundsOverlapObjectTypeBracket(bounds->fields.back()); + + // Helper function to check whether the final path component in 'queryPath' is an array index. + const auto lastFieldIsArrayIndex = [&multikeyPaths](const auto& queryPath) { + return (queryPath.numParts() > 1u && multikeyPaths.count(queryPath.numParts() - 2u) && + queryPath.isNumericPathComponentStrict(queryPath.numParts() - 1u)); + }; + + // If subpath bounds are needed, we build a range interval on all subpaths of the query path(s). + // We must therefore trim any trailing array indices from the query path before generating the + // fieldname-or-array power set, in order to avoid overlapping the final set of bounds. For + // instance, the untrimmed query path 'a.0' will produce paths 'a' and 'a.0' if 'a' is multikey, + // and so we would end up with bounds [['a','a'], ['a.','a/'], ['a.0','a.0'], ['a.0.','a.0/']]. + // The latter two are subsets of the ['a.', 'a/'] interval. + while (requiresSubpathBounds && lastFieldIsArrayIndex(queryPath)) { + queryPath.removeLastPart(); + } + + // Account for fieldname-or-array-index semantics. $** indexes do not explicitly encode array + // indices in their keys, so if this query traverses one or more multikey fields via an array + // index (e.g. query 'a.0.b' where 'a' is an array), then we must generate bounds on all array- + // and non-array permutations of the path in order to produce INEXACT_FETCH bounds. + auto paths = generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath); + + // Add a $_path point-interval for each path that needs to be traversed in the index. If subpath + // bounds are required, then we must add a further range interval on ["path.","path/"). + static const char subPathStart = '.', subPathEnd = static_cast<char>('.' + 1); + auto& pathIntervals = bounds->fields.front().intervals; + for (const auto& fieldPath : paths) { + auto path = fieldPath.dottedField().toString(); + pathIntervals.push_back(IndexBoundsBuilder::makePointInterval(path)); + if (requiresSubpathBounds) { + pathIntervals.push_back(IndexBoundsBuilder::makeRangeInterval( + path + subPathStart, path + subPathEnd, BoundInclusion::kIncludeStartKeyOnly)); + } + } + // Ensure that the bounds' intervals are correctly aligned. + IndexBoundsBuilder::alignBounds(bounds, index->keyPattern); +} + +bool isWildcardObjectSubpathScan(const IndexScanNode* node) { + // If the node is not a $** index scan, return false immediately. + if (!node || node->index.type != IndexType::INDEX_WILDCARD) { + return false; + } + + // We expect consistent arguments, representing a $** index which has already been finalized. + invariant(node->index.keyPattern.nFields() == 2); + invariant(node->index.multikeyPaths.size() == 2); + invariant(node->bounds.fields.size() == 2); + invariant(node->bounds.fields.front().name == node->index.keyPattern.firstElementFieldName()); + invariant(node->bounds.fields.back().name == + std::next(node->index.keyPattern.begin())->fieldName()); + + // Check the bounds on the query field for any intersections with the object type bracket. + return boundsOverlapObjectTypeBracket(node->bounds.fields.back()); } } // namespace wildcard_planning diff --git a/src/mongo/db/query/planner_wildcard_helpers.h b/src/mongo/db/query/planner_wildcard_helpers.h index ddd0185d7c3..f05c475295f 100644 --- a/src/mongo/db/query/planner_wildcard_helpers.h +++ b/src/mongo/db/query/planner_wildcard_helpers.h @@ -33,6 +33,7 @@ #include "mongo/db/field_ref.h" #include "mongo/db/index/multikey_paths.h" #include "mongo/db/query/index_bounds_builder.h" +#include "mongo/db/query/index_entry.h" #include "mongo/db/query/query_solution.h" namespace mongo { @@ -42,27 +43,17 @@ using BoundsTightness = IndexBoundsBuilder::BoundsTightness; /** * Specifies the maximum depth of nested array indices through which a query may traverse before a - * $** declines to answer it, due to the exponential complexity of the bounds required. + * $** index declines to answer it, due to the exponential complexity of the bounds required. */ static constexpr size_t kWildcardMaxArrayIndexTraversalDepth = 8u; /** - * Returns a MultikeyPaths which indicates which components of 'indexedPath' are multikey, by - * looking up multikeyness in 'multikeyPathSet'. + * Given a single wildcard index, and a set of fields which are being queried, create a 'mock' + * IndexEntry for each of the query fields and add them into the provided vector. */ -MultikeyPaths buildMultiKeyPathsForExpandedWildcardIndexEntry( - const FieldRef& indexedPath, const std::set<FieldRef>& multikeyPathSet); - -/** - * Given a query path and the set of known multikey path components, this function outputs the power - * set of paths generated by considering each relevant numerical path component first as a literal - * fieldname, and then as an array index. In other words, for each numerical path component that - * immediately follows a multikey field, we will output one path with the numerical component and - * one path without. If both 'a' and 'b' are multikey, then the queryPath 'a.0.b.1.c' will produce - * ['a.0.b.1.c', 'a.0.b.c', 'a.b.1.c', 'a.b.c']. - */ -std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size_t>& multikeyPaths, - const FieldRef& queryPath); +void expandWildcardIndexEntry(const IndexEntry& wildcardIndex, + const stdx::unordered_set<std::string>& fields, + std::vector<IndexEntry>* out); /** * In certain circumstances, it is necessary to adjust the bounds and tightness generated by the @@ -76,27 +67,21 @@ BoundsTightness translateWildcardIndexBoundsAndTightness(const IndexEntry& index OrderedIntervalList* oil); /** - * Returns false if 'queryPath' includes any numerical path components which render it unanswerable - * by the $** index, true otherwise. Specifically, the $** index cannot answer the query if either - * of the following scenarios occur: - * - * - The query path traverses through more than 'kWildcardMaxArrayIndexTraversalDepth' nested arrays - * via explicit array indices. - * - The query path lies along a $** projection through an array index. - * - * For an example of the latter case, say that our query path is 'a.0.b', our projection includes - * {'a.0': 1}, and 'a' is multikey. The query semantics will match 'a.0.b' by either field name or - * array index against the documents, but because the $** index always projects numeric path - * components strictly as field names, the projection {'a.0': 1} cannot correctly support this - * query. - * - * To see why, consider the document {a: [1, 2, 3]}. Query {'a.0': 1} will match this document, but - * the projection {'a.0': 1} will produce output document {a: []}, and so we will not index any of - * the values [1, 2, 3] for 'a'. + * During planning, the expanded $** IndexEntry's keyPattern and bounds are in the single-field + * format {'path': 1}. Once planning is complete, it is necessary to call this method in order to + * prepare the IndexEntry and bounds for execution. This function performs the following actions: + * - Converts the keyPattern to the {$_path: 1, "path": 1} format expected by the $** index. + * - Adds a new entry '$_path' to the bounds vector, and computes the necessary intervals on it. + * - Adds a new, empty entry to 'multikeyPaths' for '$_path'. + */ +void finalizeWildcardIndexScanConfiguration(IndexEntry* index, IndexBounds* bounds); + +/** + * Returns true if the given IndexScanNode is a $** scan whose bounds overlap the object type + * bracket. Scans whose bounds include the object bracket have certain limitations for planning + * purposes; for instance, they cannot provide covered results or be converted to DISTINCT_SCAN. */ -bool validateNumericPathComponents(const MultikeyPaths& multikeyPaths, - const std::set<FieldRef>& includedPaths, - const FieldRef& queryPath); +bool isWildcardObjectSubpathScan(const IndexScanNode* node); /** * Return true if the intervals on the 'value' field will include subobjects, and diff --git a/src/mongo/db/query/query_planner_wildcard_index_test.cpp b/src/mongo/db/query/query_planner_wildcard_index_test.cpp index 5bc23eb2892..025f1ce9fa5 100644 --- a/src/mongo/db/query/query_planner_wildcard_index_test.cpp +++ b/src/mongo/db/query/query_planner_wildcard_index_test.cpp @@ -28,10 +28,14 @@ #include "mongo/platform/basic.h" +#include "mongo/db/query/planner_wildcard_helpers.h" #include "mongo/db/query/query_planner_test_fixture.h" +#include "mongo/unittest/death_test.h" namespace mongo { +namespace wcp = ::mongo::wildcard_planning; + const std::string kIndexName = "indexName"; /** @@ -77,6 +81,24 @@ protected: }; // +// General planning tests. +// + +DEATH_TEST_F(QueryPlannerWildcardTest, CannotExpandPreExpandedWildcardIndexEntry, "Invariant") { + addIndex(BSON("$**" << 1)); + ASSERT_EQ(params.indices.size(), 2U); + + // Expand the $** index and add the expanded entry to the list of available indices. + std::vector<IndexEntry> expandedIndex; + wcp::expandWildcardIndexEntry(params.indices.back(), {"a"}, &expandedIndex); + ASSERT_EQ(expandedIndex.size(), 1U); + params.indices.push_back(expandedIndex.front()); + + // Now run a query. This will invariant when the planner expands the expanded index. + runQuery(fromjson("{a: 1}")); +} + +// // Null comparison and existence tests. // @@ -619,6 +641,10 @@ TEST_F(QueryPlannerWildcardTest, InBasic) { "bounds: {'$_path': [['a','a',true,true]], a: [[1,1,true,true],[2,2,true,true]]}}}}}"); } +// +// Array tests. +// + TEST_F(QueryPlannerWildcardTest, EqualsEmptyArray) { addWildcardIndex(BSON("$**" << 1)); runQuery(fromjson("{a: []}")); @@ -651,6 +677,10 @@ TEST_F(QueryPlannerWildcardTest, EqualsArrayWithValue) { assertHasOnlyCollscan(); } +// +// $in tests. +// + TEST_F(QueryPlannerWildcardTest, InEmptyArray) { addWildcardIndex(BSON("$**" << 1)); runQuery(fromjson("{a: {$in: [[]]}}")); @@ -696,6 +726,10 @@ TEST_F(QueryPlannerWildcardTest, InBasicOrEquivalent) { "bounds: {'$_path': [['a','a',true,true]], a: [[1,1,true,true],[2,2,true,true]]}}}}}"); } +// +// Partial index tests. +// + TEST_F(QueryPlannerWildcardTest, PartialIndexCanAnswerPredicateOnFilteredField) { auto filterObj = fromjson("{a: {$gt: 0}}"); auto filterExpr = QueryPlannerTest::parseMatchExpression(filterObj); @@ -877,6 +911,10 @@ TEST_F(QueryPlannerWildcardTest, WildcardIndexDoesNotSupplyCandidatePlanForTextS "{fetch: {filter: {b: 10}, node: {text: {prefix: {a: 10}, search: 'banana'}}}}"); } +// +// Negation tests. +// + TEST_F(QueryPlannerWildcardTest, WildcardDoesNotSupportNegationPredicate) { // Wildcard indexes can't support negation queries because they are sparse, and {a: {$ne: 5}} // will match documents which don't have an "a" field. @@ -1005,6 +1043,10 @@ TEST_F(QueryPlannerTest, MultipleWildcardIndexesHintWithPartialFilter) { assertNumSolutions(0U); } +// +// Object comparison tests. +// + TEST_F(QueryPlannerWildcardTest, WildcardIndexesDoNotSupportObjectEquality) { addIndex(BSON("$**" << 1)); @@ -1053,6 +1095,10 @@ TEST_F(QueryPlannerWildcardTest, WildcardIndexesDoNotSupportObjectInequality) { assertSolutionExists("{fetch: {node: {ixscan: {pattern: {$_path: 1, x: 1}}}}}"); } +// +// Unsupported values tests. +// + TEST_F(QueryPlannerWildcardTest, WildcardIndexesDoNotSupportInWithUnsupportedValues) { addIndex(BSON("$**" << 1)); @@ -1096,6 +1142,10 @@ TEST_F(QueryPlannerWildcardTest, WildcardIndexesDoNotSupportElemMatchObject) { assertHasOnlyCollscan(); } +// +// Sorting tests. +// + TEST_F(QueryPlannerWildcardTest, WildcardIndexCanProvideNonBlockingSort) { addWildcardIndex(BSON("$**" << 1)); @@ -1767,6 +1817,4 @@ TEST_F(QueryPlannerWildcardTest, ContainedOrPushdownWorksWithWildcardIndex) { "{$_path: [['a','a',true,true]], a:[[1,1,true,true]]}}}}}"); } -// TODO SERVER-36517: Add testing for DISTINCT_SCAN. - } // namespace mongo diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index 567cb958e0a..6ba95a37232 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -38,12 +38,15 @@ #include "mongo/db/query/collation/collation_index_key.h" #include "mongo/db/query/index_bounds_builder.h" #include "mongo/db/query/planner_analysis.h" +#include "mongo/db/query/planner_wildcard_helpers.h" #include "mongo/db/query/query_planner_common.h" namespace mongo { namespace { +namespace wcp = ::mongo::wildcard_planning; + // Create an ordred interval list which represents the bounds for all BSON elements of type String, // Object, or Array. OrderedIntervalList buildStringBoundsOil(const std::string& keyName) { @@ -536,6 +539,12 @@ void IndexScanNode::appendToString(mongoutils::str::stream* ss, int indent) cons } bool IndexScanNode::hasField(const string& field) const { + // A $** index whose bounds overlap the object type bracket cannot provide covering, since the + // index only contains the leaf nodes along each of the object's subpaths. + if (index.type == IndexType::INDEX_WILDCARD && wcp::isWildcardObjectSubpathScan(this)) { + return false; + } + // The index is multikey but does not have any path-level multikeyness information. Such indexes // can never provide covering. if (index.multikey && index.multikeyPaths.empty()) { diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 3eb550761c9..ffd0985a679 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -884,11 +884,13 @@ struct DistinctNode : public QuerySolutionNode { BSONObjSet sorts; IndexEntry index; - int direction; IndexBounds bounds; + const CollatorInterface* queryCollator; + // We are distinct-ing over the 'fieldNo'-th field of 'index.keyPattern'. - int fieldNo; + int fieldNo{0}; + int direction{1}; }; /** diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp index d45f6592308..f368a4699f8 100644 --- a/src/mongo/db/query/stage_builder.cpp +++ b/src/mongo/db/query/stage_builder.cpp @@ -311,15 +311,22 @@ PlanStage* buildStages(OperationContext* opCtx, return nullptr; } - DistinctParams params; - - params.descriptor = collection->getIndexCatalog()->findIndexByName( + auto descriptor = collection->getIndexCatalog()->findIndexByName( opCtx, dn->index.identifier.catalogName); - invariant(params.descriptor); - params.direction = dn->direction; + invariant(descriptor); + + // We use the node's internal name, keyPattern and multikey details here. For $** + // indexes, these may differ from the information recorded in the index's descriptor. + DistinctParams params{*descriptor, + dn->index.identifier.catalogName, + dn->index.keyPattern, + dn->index.multikeyPaths, + dn->index.multikey}; + + params.scanDirection = dn->direction; params.bounds = dn->bounds; params.fieldNo = dn->fieldNo; - return new DistinctScan(opCtx, params, ws); + return new DistinctScan(opCtx, std::move(params), ws); } case STAGE_COUNT_SCAN: { const CountScanNode* csn = static_cast<const CountScanNode*>(root); diff --git a/src/mongo/dbtests/query_stage_distinct.cpp b/src/mongo/dbtests/query_stage_distinct.cpp index 1fb9aad1350..d4bbd8a38b8 100644 --- a/src/mongo/dbtests/query_stage_distinct.cpp +++ b/src/mongo/dbtests/query_stage_distinct.cpp @@ -130,9 +130,8 @@ public: coll->getIndexCatalog()->findIndexesByKeyPattern(&_opCtx, BSON("a" << 1), false, &indexes); ASSERT_EQ(indexes.size(), 1U); - DistinctParams params; - params.descriptor = indexes[0]; - params.direction = 1; + DistinctParams params{&_opCtx, *indexes[0]}; + params.scanDirection = 1; // Distinct-ing over the 0-th field of the keypattern. params.fieldNo = 0; // We'll look at all values in the bounds. @@ -142,7 +141,7 @@ public: params.bounds.fields.push_back(oil); WorkingSet ws; - DistinctScan distinct(&_opCtx, params, &ws); + DistinctScan distinct(&_opCtx, std::move(params), &ws); WorkingSetID wsid; // Get our first result. @@ -197,12 +196,11 @@ public: coll->getIndexCatalog()->findIndexesByKeyPattern(&_opCtx, BSON("a" << 1), false, &indexes); verify(indexes.size() == 1); - DistinctParams params; - params.descriptor = indexes[0]; - ASSERT_TRUE(params.descriptor->isMultikey(&_opCtx)); + DistinctParams params{&_opCtx, *indexes[0]}; + ASSERT_TRUE(params.isMultiKey); - verify(params.descriptor); - params.direction = 1; + verify(params.accessMethod); + params.scanDirection = 1; // Distinct-ing over the 0-th field of the keypattern. params.fieldNo = 0; // We'll look at all values in the bounds. @@ -212,7 +210,7 @@ public: params.bounds.fields.push_back(oil); WorkingSet ws; - DistinctScan distinct(&_opCtx, params, &ws); + DistinctScan distinct(&_opCtx, std::move(params), &ws); // We should see each number in the range [1, 6] exactly once. std::set<int> seen; @@ -266,11 +264,9 @@ public: &_opCtx, BSON("a" << 1 << "b" << 1), false, &indices); ASSERT_EQ(1U, indices.size()); - DistinctParams params; - params.descriptor = indices[0]; - ASSERT_TRUE(params.descriptor); + DistinctParams params{&_opCtx, *indices[0]}; - params.direction = 1; + params.scanDirection = 1; params.fieldNo = 1; params.bounds.isSimpleRange = false; @@ -283,7 +279,7 @@ public: params.bounds.fields.push_back(bOil); WorkingSet ws; - DistinctScan distinct(&_opCtx, params, &ws); + DistinctScan distinct(&_opCtx, std::move(params), &ws); WorkingSetID wsid; PlanStage::StageState state; |