From 54a41757d77002b015c7bceca87c59f35e5a564f Mon Sep 17 00:00:00 2001 From: Alya Berciu Date: Wed, 24 Feb 2021 16:44:23 +0000 Subject: SERVER-55065: Cover null queries with index where possible --- jstests/core/cover_null_queries.js | 338 ++++++++++++++++++++++++ src/mongo/db/exec/count_scan.cpp | 2 + src/mongo/db/query/get_executor.cpp | 101 ++++++- src/mongo/db/query/index_bounds_builder.cpp | 18 +- src/mongo/db/query/index_bounds_builder.h | 9 + src/mongo/db/query/planner_access.cpp | 76 +++++- src/mongo/db/query/planner_access.h | 7 +- src/mongo/db/query/query_planner_index_test.cpp | 44 +++ 8 files changed, 575 insertions(+), 20 deletions(-) create mode 100644 jstests/core/cover_null_queries.js diff --git a/jstests/core/cover_null_queries.js b/jstests/core/cover_null_queries.js new file mode 100644 index 00000000000..cfa04925b27 --- /dev/null +++ b/jstests/core/cover_null_queries.js @@ -0,0 +1,338 @@ +/** + * Test to verify that null queries can be fully covered by an index. + * @tags: [assumes_unsharded_collection, requires_fcv_49, requires_non_retryable_writes] + */ +(function() { +"use strict"; + +load("jstests/aggregation/extras/utils.js"); // For arrayEq(). +load("jstests/libs/analyze_plan.js"); // For getAggPlanStages() and getPlanStages(). + +const coll = db.cover_null_queries; +coll.drop(); + +assert.commandWorked(coll.insertMany([ + {_id: 1, a: 1, b: 1}, + {_id: 2, a: 1, b: null}, + {_id: 3, a: null, b: 1}, + {_id: 4, a: null, b: null}, + {_id: 5, a: 2}, + {_id: 6, b: 2}, + {_id: 7}, +])); + +/** + * Validates that the explain() of command 'cmdObj' has the stages in 'expectedStages'. + * + * The field 'expectedStages' should be specified as an object with keys matching the stages + * expected in the explain output and values indicating how many times each stage should be expected + * to appear in the output (useful to verify if there are two COUNT_SCANS or that there are 0 + * IX_SCANS, for example). + * + * The field 'isAgg' is a boolean indicating whether or not the command is an aggregation. + */ +function validateStages({cmdObj, expectedStages, isAgg}) { + const explainObj = assert.commandWorked(coll.runCommand({explain: cmdObj})); + for (const [expectedStage, count] of Object.entries(expectedStages)) { + const planStages = isAgg + ? getAggPlanStages(explainObj, expectedStage, /* useQueryPlannerSection */ true) + : getPlanStages(explainObj, expectedStage); + assert.eq(planStages.length, count, planStages); + if (count > 0) { + for (const planStage of planStages) { + assert.eq(planStage.stage, expectedStage, planStage); + } + } + } +} + +/** + * Runs find command with the given 'filter' and 'projection' and validates that the output returned + * matches 'expectedOutput'. Also runs explain() command on the same find command and validates that + * all the 'expectedStages' are present in the plan returned. + */ +function validateFindCmdOutputAndPlan({filter, projection, expectedStages, expectedOutput}) { + const cmdObj = {find: coll.getName(), filter: filter, projection: projection}; + if (expectedOutput) { + const res = assert.commandWorked(coll.runCommand(cmdObj)); + const ouputArray = new DBCommandCursor(coll.getDB(), res).toArray(); + assert(arrayEq(expectedOutput, ouputArray), ouputArray); + } + validateStages({cmdObj, expectedStages}); +} + +/** + * Runs count command with the 'filter' and validates that the output returned matches + * 'expectedOutput'. Also runs explain() command and validates that all the 'expectedStages' + * are present in the plan returned. + */ +function validateSimpleCountCmdOutputAndPlan({filter, expectedStages, expectedCount}) { + const cmdObj = {count: coll.getName(), query: filter}; + const res = assert.commandWorked(coll.runCommand(cmdObj)); + assert.eq(res.n, expectedCount); + validateStages({cmdObj, expectedStages}); +} + +/** + * Runs an aggregation with a $count stage with the 'filter' applied to the $match stage and + * validates that the count returned matches 'expectedCount'. Also runs explain() command on the + * and validates that all the 'expectedStages' are present in the plan returned. + */ +function validateCountAggCmdOutputAndPlan({filter, expectedStages, expectedCount, pipeline}) { + const cmdObj = { + aggregate: coll.getName(), + pipeline: pipeline || [{$match: filter}, {$count: "count"}], + cursor: {}, + }; + const cmdRes = assert.commandWorked(coll.runCommand(cmdObj)); + const countRes = cmdRes.cursor.firstBatch; + assert.eq(countRes.length, 1, cmdRes); + assert.eq(countRes[0].count, expectedCount, countRes); + validateStages({cmdObj, expectedStages, isAgg: true}); +} + +assert.commandWorked(coll.createIndex({a: 1, _id: 1})); + +// Verify count({a: null}) can be covered by an index. In the simplest case we can use two count +// scans joined by an OR to evaluate it. +validateSimpleCountCmdOutputAndPlan({ + filter: {a: null}, + expectedCount: 4, + expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0}, +}); + +// Verify $count stage in aggregation matching {a: null} yields the same plan. +validateCountAggCmdOutputAndPlan({ + filter: {a: null}, + expectedCount: 4, + expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0}, +}); + +// Verify find({a: null}, {_id: 1}) can be covered by an index. +validateFindCmdOutputAndPlan({ + filter: {a: null}, + projection: {_id: 1}, + expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}], + expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, +}); + +// Verify that a more complex projection that only relies on the _id field does not need a FETCH. +validateFindCmdOutputAndPlan({ + filter: {a: null}, + projection: {_id: 1, incr_id: {$add: [1, "$_id"]}}, + expectedOutput: + [{_id: 3, incr_id: 4}, {_id: 4, incr_id: 5}, {_id: 6, incr_id: 7}, {_id: 7, incr_id: 8}], + expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_DEFAULT": 1}, +}); + +// Verify that a more complex projection that computes a new field based on _id but excludes the _id +// field does not require a fetch stage. +validateFindCmdOutputAndPlan({ + filter: {a: null}, + projection: {_id: 0, incr_id: {$add: [1, "$_id"]}}, + expectedOutput: [{incr_id: 4}, {incr_id: 5}, {incr_id: 7}, {incr_id: 8}], + expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_DEFAULT": 1}, +}); + +// Verify that a more complex projection that relies on any non-_id field does need a FETCH. +validateFindCmdOutputAndPlan({ + filter: {a: null}, + projection: {_id: 1, incr_id: {$add: ["$a", "$_id"]}}, + expectedOutput: [ + {_id: 3, incr_id: null}, + {_id: 4, incr_id: null}, + {_id: 6, incr_id: null}, + {_id: 7, incr_id: null} + ], + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_DEFAULT": 1}, +}); + +// Verify that an exclusion projection does need a FETCH. +validateFindCmdOutputAndPlan({ + filter: {a: null}, + projection: {a: 0, b: 0}, + expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}], + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_DEFAULT": 1}, +}); + +// Verify find({a: null}, {_id: 1, b: 1}) is not covered by an index so we still have a FETCH stage. +validateFindCmdOutputAndPlan({ + filter: {a: null}, + projection: {_id: 1, b: 1}, + expectedOutput: [{_id: 3, b: 1}, {_id: 4, b: null}, {_id: 6, b: 2}, {_id: 7}], + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, +}); + +// Verify find({a: null}, {a: 1}) still has a FETCH stage because the index alone cannot determine +// if the value of field a is null, undefined, or missing. +validateFindCmdOutputAndPlan({ + filter: {a: null}, + projection: {a: 1}, + expectedOutput: [{_id: 3, a: null}, {_id: 4, a: null}, {_id: 6}, {_id: 7}], + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, +}); + +// For exclusion projects we always need a FETCH stage. +validateFindCmdOutputAndPlan({ + filter: {a: null}, + projection: {a: 1, _id: 0}, + expectedOutput: [{a: null}, {a: null}, {}, {}], + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, +}); + +// Verify that if the index is multikey, this optimization cannot be applied, as the index alone +// cannot differentiate between null and []. +assert.commandWorked(coll.insertOne({_id: 8, a: []})); +validateSimpleCountCmdOutputAndPlan({ + filter: {a: null, _id: 3}, + expectedCount: 1, + expectedStages: {"FETCH": 1, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0} +}); +validateCountAggCmdOutputAndPlan({ + filter: {a: null, _id: 3}, + expectedCount: 1, + expectedStages: {"FETCH": 1, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0}, +}); +validateFindCmdOutputAndPlan({ + filter: {a: null, _id: 3}, + projection: {_id: 1}, + expectedOutput: [{_id: 3}], + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, +}); +assert.commandWorked(coll.deleteOne({_id: 8})); + +// Test case when query is fully covered but we still need to fetch to correctly project a field. +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.createIndex({a: 1, _id: 1})); +assert.commandWorked(coll.createIndex({a: 1, b: 1, _id: 1})); +validateFindCmdOutputAndPlan({ + filter: {a: null}, + projection: {_id: 1, b: 1}, + expectedOutput: [ + {_id: 3, b: 1}, + {_id: 4, b: null}, + {_id: 6, b: 2}, + {_id: 7}, + ], + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, +}); + +// Test case when query is fully covered but predicate is not a single interval. +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.createIndex({a: 1, b: 1, _id: 1})); +validateFindCmdOutputAndPlan({ + filter: {a: {$in: [1, 2, 3]}, b: null}, + projection: {_id: 1}, + expectedOutput: [{_id: 2}, {_id: 5}], + expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, +}); +// Note that we can't use a COUNT_SCAN here because we have a complex interval. +validateSimpleCountCmdOutputAndPlan({ + filter: {a: {$in: [1, 2, 3]}, b: null}, + expectedCount: 2, + expectedStages: {"IXSCAN": 1, "FETCH": 0}, +}); +validateCountAggCmdOutputAndPlan({ + filter: {a: {$in: [1, 2, 3]}, b: null}, + expectedCount: 2, + expectedStages: {"IXSCAN": 1, "FETCH": 0}, +}); + +validateFindCmdOutputAndPlan({ + filter: {a: 1, b: null}, + projection: {_id: 1}, + expectedOutput: [{_id: 2}], + expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, +}); +validateSimpleCountCmdOutputAndPlan({ + filter: {a: 1, b: null}, + expectedCount: 1, + expectedStages: {"OR": 1, "COUNT_SCAN": 2, "FETCH": 0}, +}); +validateCountAggCmdOutputAndPlan({ + filter: {a: 1, b: null}, + expectedCount: 1, + expectedStages: {"OR": 1, "COUNT_SCAN": 2, "FETCH": 0}, +}); + +// Test case when counting nulls where documents are sorted in the opposite direction as the index. +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.createIndex({a: -1, b: -1})); +validateCountAggCmdOutputAndPlan({ + expectedCount: 2, + expectedStages: {"OR": 1, "COUNT_SCAN": 2, "FETCH": 0}, + pipeline: /* Sort by field a in the opposite direction of the index. */ + [{$match: {a: null, b: {$gt: 0}}}, {$sort: {a: 1}}, {$count: "count"}], +}); + +// Test case when query is fully covered, predicate is not a single interval, and the index does not +// include the _id field. A find projection in this case will not be covered, but any count should +// be covered. +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.createIndex({a: 1, b: 1})); +validateFindCmdOutputAndPlan({ + filter: {a: {$in: [1, 2, 3]}, b: null}, + projection: {_id: 1}, + expectedOutput: [{_id: 2}, {_id: 5}], + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, +}); +// Note that we can't use a COUNT_SCAN here because we have a complex interval. +validateSimpleCountCmdOutputAndPlan({ + filter: {a: {$in: [1, 2, 3]}, b: null}, + expectedCount: 2, + expectedStages: {"IXSCAN": 1, "FETCH": 0}, +}); +validateCountAggCmdOutputAndPlan({ + filter: {a: {$in: [1, 2, 3]}, b: null}, + expectedCount: 2, + expectedStages: {"IXSCAN": 1, "FETCH": 0}, +}); + +// Test index intersection plan. +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.createIndex({a: 1, _id: 1})); +assert.commandWorked(coll.createIndex({b: 1, _id: 1})); +validateFindCmdOutputAndPlan({ + filter: {a: null, b: null}, + projection: {_id: 1}, + expectedOutput: [{_id: 4}, {_id: 7}], + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, +}); +validateFindCmdOutputAndPlan({ + filter: {a: null, b: 1}, + projection: {_id: 1}, + expectedOutput: [{_id: 3}], + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, +}); + +// Verify the case where id field is accessed along a dotted path. +// We still need a FETCH in this case because the index cannot differentiate between null and +// missing values within _id, e.g. {_id: {x: null}} vs. {_id: {}} would both be returned as +// {_id: {x: null}} by the index. +assert.commandWorked(coll.dropIndexes()); +assert.commandWorked(coll.deleteMany({})); +assert.commandWorked(coll.createIndex({a: 1, "_id.x": 1})); +assert.commandWorked(coll.insertMany([ + {a: null, _id: {x: 1}}, + {a: null, _id: {x: 1, y: 1}}, + {a: null, _id: {y: 1}}, + {_id: {x: 1, y: 2}}, + {a: "not null", _id: {x: 3}}, +])); +validateFindCmdOutputAndPlan({ + filter: {a: null}, + projection: {"_id.x": 1}, + expectedOutput: [{_id: {x: 1}}, {_id: {x: 1}}, {_id: {}}, {_id: {x: 1}}], + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_DEFAULT": 1}, +}); +validateSimpleCountCmdOutputAndPlan({ + filter: {a: null}, + expectedCount: 4, + expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0}, +}); +validateCountAggCmdOutputAndPlan({ + filter: {a: null}, + expectedCount: 4, + expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0}, +}); +})(); diff --git a/src/mongo/db/exec/count_scan.cpp b/src/mongo/db/exec/count_scan.cpp index 8174e35ce2e..b37ac05a2d6 100644 --- a/src/mongo/db/exec/count_scan.cpp +++ b/src/mongo/db/exec/count_scan.cpp @@ -151,6 +151,8 @@ PlanStage::StageState CountScan::doWork(WorkingSetID* out) { } WorkingSetID id = _workingSet->allocate(); + WorkingSetMember* member = _workingSet->get(id); + member->recordId = entry->loc; _workingSet->transitionToRecordIdAndObj(id); *out = id; return PlanStage::ADVANCED; diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 92874117844..a18f9e61d21 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -1576,6 +1576,25 @@ StatusWith> getExecutorUpda namespace { +/** + * If the given 'bounds' contains a single null interval return its position, otherwise return + * boost::none. + */ +boost::optional boundsHasExactlyOneNullInterval(const IndexBounds& bounds) { + boost::optional nullFieldNo; + for (size_t fieldNo = 0; fieldNo < bounds.fields.size(); ++fieldNo) { + const OrderedIntervalList& oil = bounds.fields[fieldNo]; + if (IndexBoundsBuilder::isNullInterval(oil)) { + // Return boost::none if we have multiple null intervals. + if (nullFieldNo) { + return boost::none; + } + nullFieldNo = fieldNo; + } + } + return nullFieldNo; +} + /** * Returns 'true' if the provided solution 'soln' can be rewritten to use * a fast counting stage. Mutates the tree in 'soln->root'. @@ -1617,25 +1636,81 @@ bool turnIxscanIntoCount(QuerySolution* soln) { BSONObj endKey; bool endKeyInclusive; + auto makeCountScan = [&isn](BSONObj& csnStartKey, + bool startKeyInclusive, + BSONObj& csnEndKey, + bool endKeyInclusive) { + // Since count scans return no data, they are always forward scans. Index scans, on the + // other hand, may need to scan the index in reverse order in order to obtain a sort. If the + // index scan direction is backwards, then we need to swap the start and end of the count + // scan bounds. + if (isn->direction < 0) { + csnStartKey.swap(csnEndKey); + std::swap(startKeyInclusive, endKeyInclusive); + } + + auto csn = std::make_unique(isn->index); + csn->startKey = csnStartKey; + csn->startKeyInclusive = startKeyInclusive; + csn->endKey = csnEndKey; + csn->endKeyInclusive = endKeyInclusive; + return csn; + }; + if (!IndexBoundsBuilder::isSingleInterval( isn->bounds, &startKey, &startKeyInclusive, &endKey, &endKeyInclusive)) { - return false; - } + // If we have exactly one null interval, we should split the bounds and try to construct + // two COUNT_SCAN stages joined by an OR stage. If we had multiple null intervals, we would + // need 2^N count scans for N intervals, meaning this would quickly explode to a + // point where it would just be more efficient to use a single index scan. Consequently, we + // draw the line at one null interval. + if (auto nullFieldNo = boundsHasExactlyOneNullInterval(isn->bounds)) { + OrderedIntervalList undefinedPointOil, nullPointOil; + undefinedPointOil.intervals.push_back(IndexBoundsBuilder::kUndefinedPointInterval); + nullPointOil.intervals.push_back(IndexBoundsBuilder::kNullPointInterval); + + tassert(5506501, + "The index of the null interval is invalid", + *nullFieldNo < isn->bounds.fields.size()); + auto makeNullBoundsCountScan = + [&](OrderedIntervalList& oil) -> std::unique_ptr { + std::swap(isn->bounds.fields[*nullFieldNo], oil); + ON_BLOCK_EXIT([&] { std::swap(isn->bounds.fields[*nullFieldNo], oil); }); + + BSONObj startKey, endKey; + bool startKeyInclusive, endKeyInclusive; + if (IndexBoundsBuilder::isSingleInterval( + isn->bounds, &startKey, &startKeyInclusive, &endKey, &endKeyInclusive)) { + return makeCountScan(startKey, startKeyInclusive, endKey, endKeyInclusive); + } + + return nullptr; + }; + + auto undefinedCsn = makeNullBoundsCountScan(undefinedPointOil); + + if (undefinedCsn) { + // If undefinedCsn is non-null, then we should also be able to successfully generate + // a count scan for the null interval case. + auto nullCsn = makeNullBoundsCountScan(nullPointOil); + tassert(5506500, "Invalid null bounds COUNT_SCAN", nullCsn); - // Since count scans return no data, they are always forward scans. Index scans, on the other - // hand, may need to scan the index in reverse order in order to obtain a sort. If the index - // scan direction is backwards, then we need to swap the start and end of the count scan bounds. - if (isn->direction < 0) { - startKey.swap(endKey); - std::swap(startKeyInclusive, endKeyInclusive); + auto csns = makeVector(std::move(undefinedCsn), std::move(nullCsn)); + + // Note that there is no need to deduplicate here because this optimization cannot + // be applied to multikey indexes. + auto orn = std::make_unique(); + orn->addChildren(std::move(csns)); + soln->setRoot(std::move(orn)); + + return true; + } + } + return false; } // Make the count node that we replace the fetch + ixscan with. - auto csn = std::make_unique(isn->index); - csn->startKey = startKey; - csn->startKeyInclusive = startKeyInclusive; - csn->endKey = endKey; - csn->endKeyInclusive = endKeyInclusive; + auto csn = makeCountScan(startKey, startKeyInclusive, endKey, endKeyInclusive); // Takes ownership of 'cn' and deletes the old root. soln->setRoot(std::move(csn)); return true; diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index aa05e986aed..4176823172e 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -129,11 +129,10 @@ const Interval kHashedNullInterval = IndexBoundsBuilder::makePointInterval(ExpressionMapping::hash(kNullElementObj.firstElement())); Interval makeUndefinedPointInterval(bool isHashed) { - return isHashed ? kHashedUndefinedInterval - : IndexBoundsBuilder::makePointInterval(kUndefinedElementObj); + return isHashed ? kHashedUndefinedInterval : IndexBoundsBuilder::kUndefinedPointInterval; } Interval makeNullPointInterval(bool isHashed) { - return isHashed ? kHashedNullInterval : IndexBoundsBuilder::makePointInterval(kNullElementObj); + return isHashed ? kHashedNullInterval : IndexBoundsBuilder::kNullPointInterval; } void makeNullEqualityBounds(const IndexEntry& index, @@ -501,6 +500,11 @@ void buildBoundsForQueryElementForGT(BSONElement dataElt, } // namespace +const Interval IndexBoundsBuilder::kUndefinedPointInterval = + IndexBoundsBuilder::makePointInterval(kUndefinedElementObj); +const Interval IndexBoundsBuilder::kNullPointInterval = + IndexBoundsBuilder::makePointInterval(kNullElementObj); + void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, const BSONElement& elt, const IndexEntry& index, @@ -1455,4 +1459,12 @@ bool IndexBoundsBuilder::isSingleInterval(const IndexBounds& bounds, } } +// static +bool IndexBoundsBuilder::isNullInterval(const OrderedIntervalList& oil) { + // Checks if the the intervals are [undefined, undefined] and [null, null]. + // Note: the order is always the same (see makeNullEqualityBounds()). + return 2 == oil.intervals.size() && oil.intervals[0].equals(kUndefinedPointInterval) && + oil.intervals[1].equals(kNullPointInterval); +} + } // namespace mongo diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h index 7f141600837..67205d64e7a 100644 --- a/src/mongo/db/query/index_bounds_builder.h +++ b/src/mongo/db/query/index_bounds_builder.h @@ -44,6 +44,9 @@ class CollatorInterface; */ class IndexBoundsBuilder { public: + static const Interval kUndefinedPointInterval; + static const Interval kNullPointInterval; + /** * Describes various degrees of precision with which predicates can be evaluated based * on the index bounds. @@ -208,6 +211,12 @@ public: BSONObj* endKey, bool* endKeyInclusive); + /** + * Returns 'true' if the ordered intervals 'oil' represent a strict null equality predicate. + * Returns 'false' otherwise. + */ + static bool isNullInterval(const OrderedIntervalList& oil); + /** * Appends the startKey and endKey of the given "all values" 'interval' (which is either * [MinKey, MaxKey] or [MaxKey, MinKey] interval) to the 'startBob' and 'endBob' respectively, diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index 46762805cec..d7627235ef8 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -937,6 +937,62 @@ std::vector> QueryPlannerAccess::collapseEqui return collapsedScans; } +/** + * Returns true if this is a null query that can retrieve all the information it needs directly from + * the index, and so does not need a FETCH stage on top of it. Returns false otherwise. + */ +bool isCoveredNullQuery(const CanonicalQuery& query, + MatchExpression* root, + IndexTag* tag, + const vector& indices, + const QueryPlannerParams& params) { + // We are only interested in queries checking for an indexed field equalling null. + // This optimization can only be done when the index is not multikey, otherwise empty arrays + // in the collection will be treated as null/undefined by the index. Additionally, + // sparse indexes and hashed indexes should not use this optimization as they will require a + // FETCH stage with a filter. + if (indices[tag->index].multikey || indices[tag->index].sparse || + indices[tag->index].type == IndexType::INDEX_HASHED || + !ComparisonMatchExpressionBase::isEquality(root->matchType())) { + return false; + } + + // Check if the query is looking for null values. + const auto node = static_cast(root); + if (node->getData().type() != BSONType::jstNULL) { + return false; + } + + // If nothing is being projected, the query is fully covered without a fetch. + // This is trivially true for a count query. + if (params.options & QueryPlannerParams::Options::IS_COUNT) { + return true; + } + + // This optimization can only be used for find when the index covers the projection completely. + // However, if the indexed field is in the projection, the index may return an incorrect value + // for the field, since it does not distinguish between null and undefined. Hence, only find + // queries projecting _id are covered. + auto proj = query.getProj(); + if (!proj) { + return false; + } + + // We can cover projections on _id and generated fields and expressions depending only on _id. + // However, if the projection is an exclusion, requires match details, requires the full + // document, or requires metadata, we will still need a FETCH stage. + if (proj->type() == projection_ast::ProjectType::kInclusion && !proj->requiresMatchDetails() && + proj->metadataDeps().none() && !proj->requiresDocument()) { + auto projFields = proj->getRequiredFields(); + // Note that it is not possible to project onto dotted paths of _id here, since they may be + // null or missing, and the index cannot differentiate between the two cases, so we would + // still need a FETCH stage. + return projFields.size() == 1 && projFields[0] == "_id"; + } + + return false; +} + bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query, MatchExpression* root, bool inArrayOperator, @@ -944,7 +1000,7 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query, const QueryPlannerParams& params, std::vector>* out) { // Initialize the ScanBuildingState. - ScanBuildingState scanState(root, inArrayOperator, indices); + ScanBuildingState scanState(root, indices, inArrayOperator); while (scanState.curChild < root->numChildren()) { MatchExpression* child = root->getChild(scanState.curChild); @@ -978,6 +1034,11 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query, // If we're here, we now know that 'child' can use an index directly and the index is // over the child's field. + // We need to track if this is a covered null query so that we can have this information + // at hand when handling the filter on an indexed AND. + scanState.isCoveredNullQuery = + isCoveredNullQuery(query, child, scanState.ixtag, indices, params); + // If 'child' is a NOT, then the tag we're interested in is on the NOT's // child node. if (MatchExpression::NOT == child->matchType()) { @@ -1437,7 +1498,11 @@ std::unique_ptr QueryPlannerAccess::_buildIndexedDataAccess( // superset of documents that satisfy the predicate, and we must check the // predicate. - if (tightness == IndexBoundsBuilder::EXACT) { + // We may also be able to avoid adding an extra fetch stage even though the bounds are + // inexact because the query is counting null values on an indexed field without + // projecting that field. + if (tightness == IndexBoundsBuilder::EXACT || + isCoveredNullQuery(query, root, tag, indices, params)) { return soln; } else if (tightness == IndexBoundsBuilder::INEXACT_COVERED && !indices[tag->index].multikey) { @@ -1584,8 +1649,13 @@ void QueryPlannerAccess::handleFilterAnd(ScanBuildingState* scanState) { // should always be affixed as a filter. We keep 'curChild' in the $and // for affixing later. ++scanState->curChild; - } else if (scanState->tightness == IndexBoundsBuilder::EXACT) { + } else if (scanState->tightness == IndexBoundsBuilder::EXACT || scanState->isCoveredNullQuery) { + // The tightness of the bounds is exact or we are dealing with a covered null query. + // Either way, we want to remove this child so that when control returns to handleIndexedAnd + // we know that we don't need it to create a FETCH stage. root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild); + // Since we are not attaching this child to a filter or tracking it anywhere else, it is + // safe to delete it here. delete child; } else if (scanState->tightness == IndexBoundsBuilder::INEXACT_COVERED && (INDEX_TEXT == index.type || !index.multikey)) { diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h index 3c9b03ff4d0..ff01a3fa79d 100644 --- a/src/mongo/db/query/planner_access.h +++ b/src/mongo/db/query/planner_access.h @@ -138,10 +138,12 @@ private: */ struct ScanBuildingState { ScanBuildingState(MatchExpression* theRoot, + const std::vector& indexList, bool inArrayOp, - const std::vector& indexList) + bool isCoveredNull = false) : root(theRoot), inArrayOperator(inArrayOp), + isCoveredNullQuery(isCoveredNull), indices(indexList), currentScan(nullptr), curChild(0), @@ -174,6 +176,9 @@ private: // Are we inside an array operator such as $elemMatch or $all? bool inArrayOperator; + // Is this a covered null query? + bool isCoveredNullQuery; + // A list of relevant indices which 'root' may be tagged to use. const std::vector& indices; diff --git a/src/mongo/db/query/query_planner_index_test.cpp b/src/mongo/db/query/query_planner_index_test.cpp index a0a725910c9..b6232696491 100644 --- a/src/mongo/db/query/query_planner_index_test.cpp +++ b/src/mongo/db/query/query_planner_index_test.cpp @@ -57,6 +57,50 @@ TEST_F(QueryPlannerTest, PlannerAddsFetchToIxscanForCountWhenFetchFilterNonempty "{pattern: {x: 1}, bounds: {x: [[5,5,true,true]]}}}}}"); } +TEST_F(QueryPlannerTest, PlannerUsesCoveredIxscanForCountWhenIndexSatisfiesNullQuery) { + params.options = QueryPlannerParams::IS_COUNT; + addIndex(BSON("x" << 1)); + runQuery(fromjson("{x: null}")); + ASSERT_EQUALS(getNumSolutions(), 1U); + assertSolutionExists( + "{ixscan: {pattern: {x: 1}, bounds: " + "{x: [[undefined,undefined,true,true],[null,null,true,true]]}}}"); +} + +TEST_F(QueryPlannerTest, PlannerAddsFetchForCountWhenMultikeyIndexSatisfiesNullQuery) { + params.options = QueryPlannerParams::IS_COUNT; + addIndex(BSON("x" << 1), true); + runQuery(fromjson("{x: null}")); + ASSERT_EQUALS(getNumSolutions(), 1U); + assertSolutionExists( + "{fetch: {filter: {x: null}, node: {ixscan: " + "{pattern: {x: 1}, bounds: " + "{x: [[undefined,undefined,true,true],[null,null,true,true]]}}}}}"); +} + +TEST_F(QueryPlannerTest, PlannerUsesCoveredIxscanFindWhenIndexSatisfiesNullQuery) { + params.options &= ~QueryPlannerParams::INCLUDE_COLLSCAN; + addIndex(fromjson("{x: 1, _id: 1}")); + runQuerySortProj(fromjson("{x: null}}"), BSONObj(), fromjson("{_id: 1}}")); + ASSERT_EQUALS(getNumSolutions(), 1U); + assertSolutionExists( + "{proj: {spec: {_id: 1}, node:" + "{ixscan: {pattern: {x: 1, _id: 1}, bounds: " + "{x: [[undefined,undefined,true,true],[null,null,true,true]]}}}}}"); +} + +TEST_F(QueryPlannerTest, PlannerAddsFetchForFindWhenMultikeyIndexSatisfiesNullQuery) { + params.options &= ~QueryPlannerParams::INCLUDE_COLLSCAN; + addIndex(fromjson("{x: 1, _id: 1}"), true); + runQuerySortProj(fromjson("{x: null}}"), BSONObj(), fromjson("{_id: 1}}")); + ASSERT_EQUALS(getNumSolutions(), 1U); + assertSolutionExists( + "{proj: {spec: {_id: 1}, node:" + "{fetch: {filter: {x: null}, node: {ixscan: " + "{pattern: {x: 1, _id: 1}, bounds: " + "{x: [[undefined,undefined,true,true],[null,null,true,true]]}}}}}}}"); +} + // // Sparse indices, SERVER-8067 // Each index in this block of tests is sparse. -- cgit v1.2.1