summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlya Berciu <alyacarina@gmail.com>2021-02-24 16:44:23 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-03-18 17:37:35 +0000
commit54a41757d77002b015c7bceca87c59f35e5a564f (patch)
tree8c29ab5e645e4bcfe0f3bac00844f760388cf213
parenteee4362074775ebd142cfd671dcca312ddc31fe2 (diff)
downloadmongo-54a41757d77002b015c7bceca87c59f35e5a564f.tar.gz
SERVER-55065: Cover null queries with index where possible
-rw-r--r--jstests/core/cover_null_queries.js338
-rw-r--r--src/mongo/db/exec/count_scan.cpp2
-rw-r--r--src/mongo/db/query/get_executor.cpp101
-rw-r--r--src/mongo/db/query/index_bounds_builder.cpp18
-rw-r--r--src/mongo/db/query/index_bounds_builder.h9
-rw-r--r--src/mongo/db/query/planner_access.cpp76
-rw-r--r--src/mongo/db/query/planner_access.h7
-rw-r--r--src/mongo/db/query/query_planner_index_test.cpp44
8 files changed, 575 insertions, 20 deletions
diff --git a/jstests/core/cover_null_queries.js b/jstests/core/cover_null_queries.js
new file mode 100644
index 00000000000..cfa04925b27
--- /dev/null
+++ b/jstests/core/cover_null_queries.js
@@ -0,0 +1,338 @@
+/**
+ * Test to verify that null queries can be fully covered by an index.
+ * @tags: [assumes_unsharded_collection, requires_fcv_49, requires_non_retryable_writes]
+ */
+(function() {
+"use strict";
+
+load("jstests/aggregation/extras/utils.js"); // For arrayEq().
+load("jstests/libs/analyze_plan.js"); // For getAggPlanStages() and getPlanStages().
+
+const coll = db.cover_null_queries;
+coll.drop();
+
+assert.commandWorked(coll.insertMany([
+ {_id: 1, a: 1, b: 1},
+ {_id: 2, a: 1, b: null},
+ {_id: 3, a: null, b: 1},
+ {_id: 4, a: null, b: null},
+ {_id: 5, a: 2},
+ {_id: 6, b: 2},
+ {_id: 7},
+]));
+
+/**
+ * Validates that the explain() of command 'cmdObj' has the stages in 'expectedStages'.
+ *
+ * The field 'expectedStages' should be specified as an object with keys matching the stages
+ * expected in the explain output and values indicating how many times each stage should be expected
+ * to appear in the output (useful to verify if there are two COUNT_SCANS or that there are 0
+ * IX_SCANS, for example).
+ *
+ * The field 'isAgg' is a boolean indicating whether or not the command is an aggregation.
+ */
+function validateStages({cmdObj, expectedStages, isAgg}) {
+ const explainObj = assert.commandWorked(coll.runCommand({explain: cmdObj}));
+ for (const [expectedStage, count] of Object.entries(expectedStages)) {
+ const planStages = isAgg
+ ? getAggPlanStages(explainObj, expectedStage, /* useQueryPlannerSection */ true)
+ : getPlanStages(explainObj, expectedStage);
+ assert.eq(planStages.length, count, planStages);
+ if (count > 0) {
+ for (const planStage of planStages) {
+ assert.eq(planStage.stage, expectedStage, planStage);
+ }
+ }
+ }
+}
+
+/**
+ * Runs find command with the given 'filter' and 'projection' and validates that the output returned
+ * matches 'expectedOutput'. Also runs explain() command on the same find command and validates that
+ * all the 'expectedStages' are present in the plan returned.
+ */
+function validateFindCmdOutputAndPlan({filter, projection, expectedStages, expectedOutput}) {
+ const cmdObj = {find: coll.getName(), filter: filter, projection: projection};
+ if (expectedOutput) {
+ const res = assert.commandWorked(coll.runCommand(cmdObj));
+ const ouputArray = new DBCommandCursor(coll.getDB(), res).toArray();
+ assert(arrayEq(expectedOutput, ouputArray), ouputArray);
+ }
+ validateStages({cmdObj, expectedStages});
+}
+
+/**
+ * Runs count command with the 'filter' and validates that the output returned matches
+ * 'expectedOutput'. Also runs explain() command and validates that all the 'expectedStages'
+ * are present in the plan returned.
+ */
+function validateSimpleCountCmdOutputAndPlan({filter, expectedStages, expectedCount}) {
+ const cmdObj = {count: coll.getName(), query: filter};
+ const res = assert.commandWorked(coll.runCommand(cmdObj));
+ assert.eq(res.n, expectedCount);
+ validateStages({cmdObj, expectedStages});
+}
+
+/**
+ * Runs an aggregation with a $count stage with the 'filter' applied to the $match stage and
+ * validates that the count returned matches 'expectedCount'. Also runs explain() command on the
+ * and validates that all the 'expectedStages' are present in the plan returned.
+ */
+function validateCountAggCmdOutputAndPlan({filter, expectedStages, expectedCount, pipeline}) {
+ const cmdObj = {
+ aggregate: coll.getName(),
+ pipeline: pipeline || [{$match: filter}, {$count: "count"}],
+ cursor: {},
+ };
+ const cmdRes = assert.commandWorked(coll.runCommand(cmdObj));
+ const countRes = cmdRes.cursor.firstBatch;
+ assert.eq(countRes.length, 1, cmdRes);
+ assert.eq(countRes[0].count, expectedCount, countRes);
+ validateStages({cmdObj, expectedStages, isAgg: true});
+}
+
+assert.commandWorked(coll.createIndex({a: 1, _id: 1}));
+
+// Verify count({a: null}) can be covered by an index. In the simplest case we can use two count
+// scans joined by an OR to evaluate it.
+validateSimpleCountCmdOutputAndPlan({
+ filter: {a: null},
+ expectedCount: 4,
+ expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
+});
+
+// Verify $count stage in aggregation matching {a: null} yields the same plan.
+validateCountAggCmdOutputAndPlan({
+ filter: {a: null},
+ expectedCount: 4,
+ expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
+});
+
+// Verify find({a: null}, {_id: 1}) can be covered by an index.
+validateFindCmdOutputAndPlan({
+ filter: {a: null},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1},
+});
+
+// Verify that a more complex projection that only relies on the _id field does not need a FETCH.
+validateFindCmdOutputAndPlan({
+ filter: {a: null},
+ projection: {_id: 1, incr_id: {$add: [1, "$_id"]}},
+ expectedOutput:
+ [{_id: 3, incr_id: 4}, {_id: 4, incr_id: 5}, {_id: 6, incr_id: 7}, {_id: 7, incr_id: 8}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_DEFAULT": 1},
+});
+
+// Verify that a more complex projection that computes a new field based on _id but excludes the _id
+// field does not require a fetch stage.
+validateFindCmdOutputAndPlan({
+ filter: {a: null},
+ projection: {_id: 0, incr_id: {$add: [1, "$_id"]}},
+ expectedOutput: [{incr_id: 4}, {incr_id: 5}, {incr_id: 7}, {incr_id: 8}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_DEFAULT": 1},
+});
+
+// Verify that a more complex projection that relies on any non-_id field does need a FETCH.
+validateFindCmdOutputAndPlan({
+ filter: {a: null},
+ projection: {_id: 1, incr_id: {$add: ["$a", "$_id"]}},
+ expectedOutput: [
+ {_id: 3, incr_id: null},
+ {_id: 4, incr_id: null},
+ {_id: 6, incr_id: null},
+ {_id: 7, incr_id: null}
+ ],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_DEFAULT": 1},
+});
+
+// Verify that an exclusion projection does need a FETCH.
+validateFindCmdOutputAndPlan({
+ filter: {a: null},
+ projection: {a: 0, b: 0},
+ expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_DEFAULT": 1},
+});
+
+// Verify find({a: null}, {_id: 1, b: 1}) is not covered by an index so we still have a FETCH stage.
+validateFindCmdOutputAndPlan({
+ filter: {a: null},
+ projection: {_id: 1, b: 1},
+ expectedOutput: [{_id: 3, b: 1}, {_id: 4, b: null}, {_id: 6, b: 2}, {_id: 7}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+
+// Verify find({a: null}, {a: 1}) still has a FETCH stage because the index alone cannot determine
+// if the value of field a is null, undefined, or missing.
+validateFindCmdOutputAndPlan({
+ filter: {a: null},
+ projection: {a: 1},
+ expectedOutput: [{_id: 3, a: null}, {_id: 4, a: null}, {_id: 6}, {_id: 7}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+
+// For exclusion projects we always need a FETCH stage.
+validateFindCmdOutputAndPlan({
+ filter: {a: null},
+ projection: {a: 1, _id: 0},
+ expectedOutput: [{a: null}, {a: null}, {}, {}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+
+// Verify that if the index is multikey, this optimization cannot be applied, as the index alone
+// cannot differentiate between null and [].
+assert.commandWorked(coll.insertOne({_id: 8, a: []}));
+validateSimpleCountCmdOutputAndPlan({
+ filter: {a: null, _id: 3},
+ expectedCount: 1,
+ expectedStages: {"FETCH": 1, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0}
+});
+validateCountAggCmdOutputAndPlan({
+ filter: {a: null, _id: 3},
+ expectedCount: 1,
+ expectedStages: {"FETCH": 1, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0},
+});
+validateFindCmdOutputAndPlan({
+ filter: {a: null, _id: 3},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 3}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+assert.commandWorked(coll.deleteOne({_id: 8}));
+
+// Test case when query is fully covered but we still need to fetch to correctly project a field.
+assert.commandWorked(coll.dropIndexes());
+assert.commandWorked(coll.createIndex({a: 1, _id: 1}));
+assert.commandWorked(coll.createIndex({a: 1, b: 1, _id: 1}));
+validateFindCmdOutputAndPlan({
+ filter: {a: null},
+ projection: {_id: 1, b: 1},
+ expectedOutput: [
+ {_id: 3, b: 1},
+ {_id: 4, b: null},
+ {_id: 6, b: 2},
+ {_id: 7},
+ ],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+
+// Test case when query is fully covered but predicate is not a single interval.
+assert.commandWorked(coll.dropIndexes());
+assert.commandWorked(coll.createIndex({a: 1, b: 1, _id: 1}));
+validateFindCmdOutputAndPlan({
+ filter: {a: {$in: [1, 2, 3]}, b: null},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 2}, {_id: 5}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1},
+});
+// Note that we can't use a COUNT_SCAN here because we have a complex interval.
+validateSimpleCountCmdOutputAndPlan({
+ filter: {a: {$in: [1, 2, 3]}, b: null},
+ expectedCount: 2,
+ expectedStages: {"IXSCAN": 1, "FETCH": 0},
+});
+validateCountAggCmdOutputAndPlan({
+ filter: {a: {$in: [1, 2, 3]}, b: null},
+ expectedCount: 2,
+ expectedStages: {"IXSCAN": 1, "FETCH": 0},
+});
+
+validateFindCmdOutputAndPlan({
+ filter: {a: 1, b: null},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 2}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+ filter: {a: 1, b: null},
+ expectedCount: 1,
+ expectedStages: {"OR": 1, "COUNT_SCAN": 2, "FETCH": 0},
+});
+validateCountAggCmdOutputAndPlan({
+ filter: {a: 1, b: null},
+ expectedCount: 1,
+ expectedStages: {"OR": 1, "COUNT_SCAN": 2, "FETCH": 0},
+});
+
+// Test case when counting nulls where documents are sorted in the opposite direction as the index.
+assert.commandWorked(coll.dropIndexes());
+assert.commandWorked(coll.createIndex({a: -1, b: -1}));
+validateCountAggCmdOutputAndPlan({
+ expectedCount: 2,
+ expectedStages: {"OR": 1, "COUNT_SCAN": 2, "FETCH": 0},
+ pipeline: /* Sort by field a in the opposite direction of the index. */
+ [{$match: {a: null, b: {$gt: 0}}}, {$sort: {a: 1}}, {$count: "count"}],
+});
+
+// Test case when query is fully covered, predicate is not a single interval, and the index does not
+// include the _id field. A find projection in this case will not be covered, but any count should
+// be covered.
+assert.commandWorked(coll.dropIndexes());
+assert.commandWorked(coll.createIndex({a: 1, b: 1}));
+validateFindCmdOutputAndPlan({
+ filter: {a: {$in: [1, 2, 3]}, b: null},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 2}, {_id: 5}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+// Note that we can't use a COUNT_SCAN here because we have a complex interval.
+validateSimpleCountCmdOutputAndPlan({
+ filter: {a: {$in: [1, 2, 3]}, b: null},
+ expectedCount: 2,
+ expectedStages: {"IXSCAN": 1, "FETCH": 0},
+});
+validateCountAggCmdOutputAndPlan({
+ filter: {a: {$in: [1, 2, 3]}, b: null},
+ expectedCount: 2,
+ expectedStages: {"IXSCAN": 1, "FETCH": 0},
+});
+
+// Test index intersection plan.
+assert.commandWorked(coll.dropIndexes());
+assert.commandWorked(coll.createIndex({a: 1, _id: 1}));
+assert.commandWorked(coll.createIndex({b: 1, _id: 1}));
+validateFindCmdOutputAndPlan({
+ filter: {a: null, b: null},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 4}, {_id: 7}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+validateFindCmdOutputAndPlan({
+ filter: {a: null, b: 1},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 3}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+
+// Verify the case where id field is accessed along a dotted path.
+// We still need a FETCH in this case because the index cannot differentiate between null and
+// missing values within _id, e.g. {_id: {x: null}} vs. {_id: {}} would both be returned as
+// {_id: {x: null}} by the index.
+assert.commandWorked(coll.dropIndexes());
+assert.commandWorked(coll.deleteMany({}));
+assert.commandWorked(coll.createIndex({a: 1, "_id.x": 1}));
+assert.commandWorked(coll.insertMany([
+ {a: null, _id: {x: 1}},
+ {a: null, _id: {x: 1, y: 1}},
+ {a: null, _id: {y: 1}},
+ {_id: {x: 1, y: 2}},
+ {a: "not null", _id: {x: 3}},
+]));
+validateFindCmdOutputAndPlan({
+ filter: {a: null},
+ projection: {"_id.x": 1},
+ expectedOutput: [{_id: {x: 1}}, {_id: {x: 1}}, {_id: {}}, {_id: {x: 1}}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_DEFAULT": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+ filter: {a: null},
+ expectedCount: 4,
+ expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
+});
+validateCountAggCmdOutputAndPlan({
+ filter: {a: null},
+ expectedCount: 4,
+ expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
+});
+})();
diff --git a/src/mongo/db/exec/count_scan.cpp b/src/mongo/db/exec/count_scan.cpp
index 8174e35ce2e..b37ac05a2d6 100644
--- a/src/mongo/db/exec/count_scan.cpp
+++ b/src/mongo/db/exec/count_scan.cpp
@@ -151,6 +151,8 @@ PlanStage::StageState CountScan::doWork(WorkingSetID* out) {
}
WorkingSetID id = _workingSet->allocate();
+ WorkingSetMember* member = _workingSet->get(id);
+ member->recordId = entry->loc;
_workingSet->transitionToRecordIdAndObj(id);
*out = id;
return PlanStage::ADVANCED;
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 92874117844..a18f9e61d21 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -1577,6 +1577,25 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorUpda
namespace {
/**
+ * If the given 'bounds' contains a single null interval return its position, otherwise return
+ * boost::none.
+ */
+boost::optional<size_t> boundsHasExactlyOneNullInterval(const IndexBounds& bounds) {
+ boost::optional<size_t> nullFieldNo;
+ for (size_t fieldNo = 0; fieldNo < bounds.fields.size(); ++fieldNo) {
+ const OrderedIntervalList& oil = bounds.fields[fieldNo];
+ if (IndexBoundsBuilder::isNullInterval(oil)) {
+ // Return boost::none if we have multiple null intervals.
+ if (nullFieldNo) {
+ return boost::none;
+ }
+ nullFieldNo = fieldNo;
+ }
+ }
+ return nullFieldNo;
+}
+
+/**
* Returns 'true' if the provided solution 'soln' can be rewritten to use
* a fast counting stage. Mutates the tree in 'soln->root'.
*
@@ -1617,25 +1636,81 @@ bool turnIxscanIntoCount(QuerySolution* soln) {
BSONObj endKey;
bool endKeyInclusive;
+ auto makeCountScan = [&isn](BSONObj& csnStartKey,
+ bool startKeyInclusive,
+ BSONObj& csnEndKey,
+ bool endKeyInclusive) {
+ // Since count scans return no data, they are always forward scans. Index scans, on the
+ // other hand, may need to scan the index in reverse order in order to obtain a sort. If the
+ // index scan direction is backwards, then we need to swap the start and end of the count
+ // scan bounds.
+ if (isn->direction < 0) {
+ csnStartKey.swap(csnEndKey);
+ std::swap(startKeyInclusive, endKeyInclusive);
+ }
+
+ auto csn = std::make_unique<CountScanNode>(isn->index);
+ csn->startKey = csnStartKey;
+ csn->startKeyInclusive = startKeyInclusive;
+ csn->endKey = csnEndKey;
+ csn->endKeyInclusive = endKeyInclusive;
+ return csn;
+ };
+
if (!IndexBoundsBuilder::isSingleInterval(
isn->bounds, &startKey, &startKeyInclusive, &endKey, &endKeyInclusive)) {
- return false;
- }
+ // If we have exactly one null interval, we should split the bounds and try to construct
+ // two COUNT_SCAN stages joined by an OR stage. If we had multiple null intervals, we would
+ // need 2^N count scans for N intervals, meaning this would quickly explode to a
+ // point where it would just be more efficient to use a single index scan. Consequently, we
+ // draw the line at one null interval.
+ if (auto nullFieldNo = boundsHasExactlyOneNullInterval(isn->bounds)) {
+ OrderedIntervalList undefinedPointOil, nullPointOil;
+ undefinedPointOil.intervals.push_back(IndexBoundsBuilder::kUndefinedPointInterval);
+ nullPointOil.intervals.push_back(IndexBoundsBuilder::kNullPointInterval);
+
+ tassert(5506501,
+ "The index of the null interval is invalid",
+ *nullFieldNo < isn->bounds.fields.size());
+ auto makeNullBoundsCountScan =
+ [&](OrderedIntervalList& oil) -> std::unique_ptr<QuerySolutionNode> {
+ std::swap(isn->bounds.fields[*nullFieldNo], oil);
+ ON_BLOCK_EXIT([&] { std::swap(isn->bounds.fields[*nullFieldNo], oil); });
+
+ BSONObj startKey, endKey;
+ bool startKeyInclusive, endKeyInclusive;
+ if (IndexBoundsBuilder::isSingleInterval(
+ isn->bounds, &startKey, &startKeyInclusive, &endKey, &endKeyInclusive)) {
+ return makeCountScan(startKey, startKeyInclusive, endKey, endKeyInclusive);
+ }
+
+ return nullptr;
+ };
+
+ auto undefinedCsn = makeNullBoundsCountScan(undefinedPointOil);
+
+ if (undefinedCsn) {
+ // If undefinedCsn is non-null, then we should also be able to successfully generate
+ // a count scan for the null interval case.
+ auto nullCsn = makeNullBoundsCountScan(nullPointOil);
+ tassert(5506500, "Invalid null bounds COUNT_SCAN", nullCsn);
- // Since count scans return no data, they are always forward scans. Index scans, on the other
- // hand, may need to scan the index in reverse order in order to obtain a sort. If the index
- // scan direction is backwards, then we need to swap the start and end of the count scan bounds.
- if (isn->direction < 0) {
- startKey.swap(endKey);
- std::swap(startKeyInclusive, endKeyInclusive);
+ auto csns = makeVector(std::move(undefinedCsn), std::move(nullCsn));
+
+ // Note that there is no need to deduplicate here because this optimization cannot
+ // be applied to multikey indexes.
+ auto orn = std::make_unique<OrNode>();
+ orn->addChildren(std::move(csns));
+ soln->setRoot(std::move(orn));
+
+ return true;
+ }
+ }
+ return false;
}
// Make the count node that we replace the fetch + ixscan with.
- auto csn = std::make_unique<CountScanNode>(isn->index);
- csn->startKey = startKey;
- csn->startKeyInclusive = startKeyInclusive;
- csn->endKey = endKey;
- csn->endKeyInclusive = endKeyInclusive;
+ auto csn = makeCountScan(startKey, startKeyInclusive, endKey, endKeyInclusive);
// Takes ownership of 'cn' and deletes the old root.
soln->setRoot(std::move(csn));
return true;
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index aa05e986aed..4176823172e 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -129,11 +129,10 @@ const Interval kHashedNullInterval =
IndexBoundsBuilder::makePointInterval(ExpressionMapping::hash(kNullElementObj.firstElement()));
Interval makeUndefinedPointInterval(bool isHashed) {
- return isHashed ? kHashedUndefinedInterval
- : IndexBoundsBuilder::makePointInterval(kUndefinedElementObj);
+ return isHashed ? kHashedUndefinedInterval : IndexBoundsBuilder::kUndefinedPointInterval;
}
Interval makeNullPointInterval(bool isHashed) {
- return isHashed ? kHashedNullInterval : IndexBoundsBuilder::makePointInterval(kNullElementObj);
+ return isHashed ? kHashedNullInterval : IndexBoundsBuilder::kNullPointInterval;
}
void makeNullEqualityBounds(const IndexEntry& index,
@@ -501,6 +500,11 @@ void buildBoundsForQueryElementForGT(BSONElement dataElt,
} // namespace
+const Interval IndexBoundsBuilder::kUndefinedPointInterval =
+ IndexBoundsBuilder::makePointInterval(kUndefinedElementObj);
+const Interval IndexBoundsBuilder::kNullPointInterval =
+ IndexBoundsBuilder::makePointInterval(kNullElementObj);
+
void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr,
const BSONElement& elt,
const IndexEntry& index,
@@ -1455,4 +1459,12 @@ bool IndexBoundsBuilder::isSingleInterval(const IndexBounds& bounds,
}
}
+// static
+bool IndexBoundsBuilder::isNullInterval(const OrderedIntervalList& oil) {
+ // Checks if the the intervals are [undefined, undefined] and [null, null].
+ // Note: the order is always the same (see makeNullEqualityBounds()).
+ return 2 == oil.intervals.size() && oil.intervals[0].equals(kUndefinedPointInterval) &&
+ oil.intervals[1].equals(kNullPointInterval);
+}
+
} // namespace mongo
diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h
index 7f141600837..67205d64e7a 100644
--- a/src/mongo/db/query/index_bounds_builder.h
+++ b/src/mongo/db/query/index_bounds_builder.h
@@ -44,6 +44,9 @@ class CollatorInterface;
*/
class IndexBoundsBuilder {
public:
+ static const Interval kUndefinedPointInterval;
+ static const Interval kNullPointInterval;
+
/**
* Describes various degrees of precision with which predicates can be evaluated based
* on the index bounds.
@@ -209,6 +212,12 @@ public:
bool* endKeyInclusive);
/**
+ * Returns 'true' if the ordered intervals 'oil' represent a strict null equality predicate.
+ * Returns 'false' otherwise.
+ */
+ static bool isNullInterval(const OrderedIntervalList& oil);
+
+ /**
* Appends the startKey and endKey of the given "all values" 'interval' (which is either
* [MinKey, MaxKey] or [MaxKey, MinKey] interval) to the 'startBob' and 'endBob' respectively,
* handling inclusivity of each bound through the relevant '*KeyInclusive' parameter.
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index 46762805cec..d7627235ef8 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -937,6 +937,62 @@ std::vector<std::unique_ptr<QuerySolutionNode>> QueryPlannerAccess::collapseEqui
return collapsedScans;
}
+/**
+ * Returns true if this is a null query that can retrieve all the information it needs directly from
+ * the index, and so does not need a FETCH stage on top of it. Returns false otherwise.
+ */
+bool isCoveredNullQuery(const CanonicalQuery& query,
+ MatchExpression* root,
+ IndexTag* tag,
+ const vector<IndexEntry>& indices,
+ const QueryPlannerParams& params) {
+ // We are only interested in queries checking for an indexed field equalling null.
+ // This optimization can only be done when the index is not multikey, otherwise empty arrays
+ // in the collection will be treated as null/undefined by the index. Additionally,
+ // sparse indexes and hashed indexes should not use this optimization as they will require a
+ // FETCH stage with a filter.
+ if (indices[tag->index].multikey || indices[tag->index].sparse ||
+ indices[tag->index].type == IndexType::INDEX_HASHED ||
+ !ComparisonMatchExpressionBase::isEquality(root->matchType())) {
+ return false;
+ }
+
+ // Check if the query is looking for null values.
+ const auto node = static_cast<const ComparisonMatchExpressionBase*>(root);
+ if (node->getData().type() != BSONType::jstNULL) {
+ return false;
+ }
+
+ // If nothing is being projected, the query is fully covered without a fetch.
+ // This is trivially true for a count query.
+ if (params.options & QueryPlannerParams::Options::IS_COUNT) {
+ return true;
+ }
+
+ // This optimization can only be used for find when the index covers the projection completely.
+ // However, if the indexed field is in the projection, the index may return an incorrect value
+ // for the field, since it does not distinguish between null and undefined. Hence, only find
+ // queries projecting _id are covered.
+ auto proj = query.getProj();
+ if (!proj) {
+ return false;
+ }
+
+ // We can cover projections on _id and generated fields and expressions depending only on _id.
+ // However, if the projection is an exclusion, requires match details, requires the full
+ // document, or requires metadata, we will still need a FETCH stage.
+ if (proj->type() == projection_ast::ProjectType::kInclusion && !proj->requiresMatchDetails() &&
+ proj->metadataDeps().none() && !proj->requiresDocument()) {
+ auto projFields = proj->getRequiredFields();
+ // Note that it is not possible to project onto dotted paths of _id here, since they may be
+ // null or missing, and the index cannot differentiate between the two cases, so we would
+ // still need a FETCH stage.
+ return projFields.size() == 1 && projFields[0] == "_id";
+ }
+
+ return false;
+}
+
bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
MatchExpression* root,
bool inArrayOperator,
@@ -944,7 +1000,7 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
const QueryPlannerParams& params,
std::vector<std::unique_ptr<QuerySolutionNode>>* out) {
// Initialize the ScanBuildingState.
- ScanBuildingState scanState(root, inArrayOperator, indices);
+ ScanBuildingState scanState(root, indices, inArrayOperator);
while (scanState.curChild < root->numChildren()) {
MatchExpression* child = root->getChild(scanState.curChild);
@@ -978,6 +1034,11 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
// If we're here, we now know that 'child' can use an index directly and the index is
// over the child's field.
+ // We need to track if this is a covered null query so that we can have this information
+ // at hand when handling the filter on an indexed AND.
+ scanState.isCoveredNullQuery =
+ isCoveredNullQuery(query, child, scanState.ixtag, indices, params);
+
// If 'child' is a NOT, then the tag we're interested in is on the NOT's
// child node.
if (MatchExpression::NOT == child->matchType()) {
@@ -1437,7 +1498,11 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::_buildIndexedDataAccess(
// superset of documents that satisfy the predicate, and we must check the
// predicate.
- if (tightness == IndexBoundsBuilder::EXACT) {
+ // We may also be able to avoid adding an extra fetch stage even though the bounds are
+ // inexact because the query is counting null values on an indexed field without
+ // projecting that field.
+ if (tightness == IndexBoundsBuilder::EXACT ||
+ isCoveredNullQuery(query, root, tag, indices, params)) {
return soln;
} else if (tightness == IndexBoundsBuilder::INEXACT_COVERED &&
!indices[tag->index].multikey) {
@@ -1584,8 +1649,13 @@ void QueryPlannerAccess::handleFilterAnd(ScanBuildingState* scanState) {
// should always be affixed as a filter. We keep 'curChild' in the $and
// for affixing later.
++scanState->curChild;
- } else if (scanState->tightness == IndexBoundsBuilder::EXACT) {
+ } else if (scanState->tightness == IndexBoundsBuilder::EXACT || scanState->isCoveredNullQuery) {
+ // The tightness of the bounds is exact or we are dealing with a covered null query.
+ // Either way, we want to remove this child so that when control returns to handleIndexedAnd
+ // we know that we don't need it to create a FETCH stage.
root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild);
+ // Since we are not attaching this child to a filter or tracking it anywhere else, it is
+ // safe to delete it here.
delete child;
} else if (scanState->tightness == IndexBoundsBuilder::INEXACT_COVERED &&
(INDEX_TEXT == index.type || !index.multikey)) {
diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h
index 3c9b03ff4d0..ff01a3fa79d 100644
--- a/src/mongo/db/query/planner_access.h
+++ b/src/mongo/db/query/planner_access.h
@@ -138,10 +138,12 @@ private:
*/
struct ScanBuildingState {
ScanBuildingState(MatchExpression* theRoot,
+ const std::vector<IndexEntry>& indexList,
bool inArrayOp,
- const std::vector<IndexEntry>& indexList)
+ bool isCoveredNull = false)
: root(theRoot),
inArrayOperator(inArrayOp),
+ isCoveredNullQuery(isCoveredNull),
indices(indexList),
currentScan(nullptr),
curChild(0),
@@ -174,6 +176,9 @@ private:
// Are we inside an array operator such as $elemMatch or $all?
bool inArrayOperator;
+ // Is this a covered null query?
+ bool isCoveredNullQuery;
+
// A list of relevant indices which 'root' may be tagged to use.
const std::vector<IndexEntry>& indices;
diff --git a/src/mongo/db/query/query_planner_index_test.cpp b/src/mongo/db/query/query_planner_index_test.cpp
index a0a725910c9..b6232696491 100644
--- a/src/mongo/db/query/query_planner_index_test.cpp
+++ b/src/mongo/db/query/query_planner_index_test.cpp
@@ -57,6 +57,50 @@ TEST_F(QueryPlannerTest, PlannerAddsFetchToIxscanForCountWhenFetchFilterNonempty
"{pattern: {x: 1}, bounds: {x: [[5,5,true,true]]}}}}}");
}
+TEST_F(QueryPlannerTest, PlannerUsesCoveredIxscanForCountWhenIndexSatisfiesNullQuery) {
+ params.options = QueryPlannerParams::IS_COUNT;
+ addIndex(BSON("x" << 1));
+ runQuery(fromjson("{x: null}"));
+ ASSERT_EQUALS(getNumSolutions(), 1U);
+ assertSolutionExists(
+ "{ixscan: {pattern: {x: 1}, bounds: "
+ "{x: [[undefined,undefined,true,true],[null,null,true,true]]}}}");
+}
+
+TEST_F(QueryPlannerTest, PlannerAddsFetchForCountWhenMultikeyIndexSatisfiesNullQuery) {
+ params.options = QueryPlannerParams::IS_COUNT;
+ addIndex(BSON("x" << 1), true);
+ runQuery(fromjson("{x: null}"));
+ ASSERT_EQUALS(getNumSolutions(), 1U);
+ assertSolutionExists(
+ "{fetch: {filter: {x: null}, node: {ixscan: "
+ "{pattern: {x: 1}, bounds: "
+ "{x: [[undefined,undefined,true,true],[null,null,true,true]]}}}}}");
+}
+
+TEST_F(QueryPlannerTest, PlannerUsesCoveredIxscanFindWhenIndexSatisfiesNullQuery) {
+ params.options &= ~QueryPlannerParams::INCLUDE_COLLSCAN;
+ addIndex(fromjson("{x: 1, _id: 1}"));
+ runQuerySortProj(fromjson("{x: null}}"), BSONObj(), fromjson("{_id: 1}}"));
+ ASSERT_EQUALS(getNumSolutions(), 1U);
+ assertSolutionExists(
+ "{proj: {spec: {_id: 1}, node:"
+ "{ixscan: {pattern: {x: 1, _id: 1}, bounds: "
+ "{x: [[undefined,undefined,true,true],[null,null,true,true]]}}}}}");
+}
+
+TEST_F(QueryPlannerTest, PlannerAddsFetchForFindWhenMultikeyIndexSatisfiesNullQuery) {
+ params.options &= ~QueryPlannerParams::INCLUDE_COLLSCAN;
+ addIndex(fromjson("{x: 1, _id: 1}"), true);
+ runQuerySortProj(fromjson("{x: null}}"), BSONObj(), fromjson("{_id: 1}}"));
+ ASSERT_EQUALS(getNumSolutions(), 1U);
+ assertSolutionExists(
+ "{proj: {spec: {_id: 1}, node:"
+ "{fetch: {filter: {x: null}, node: {ixscan: "
+ "{pattern: {x: 1, _id: 1}, bounds: "
+ "{x: [[undefined,undefined,true,true],[null,null,true,true]]}}}}}}}");
+}
+
//
// Sparse indices, SERVER-8067
// Each index in this block of tests is sparse.