SERVER-55065: Cover null queries with index where possible

author: Alya Berciu <alyacarina@gmail.com> 2021-02-24 16:44:23 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-03-18 17:37:35 +0000
commit: 54a41757d77002b015c7bceca87c59f35e5a564f (patch)
tree: 8c29ab5e645e4bcfe0f3bac00844f760388cf213
parent: eee4362074775ebd142cfd671dcca312ddc31fe2 (diff)
download: mongo-54a41757d77002b015c7bceca87c59f35e5a564f.tar.gz
8 files changed, 575 insertions, 20 deletions
diff --git a/jstests/core/cover_null_queries.js b/jstests/core/cover_null_queries.js
new file mode 100644
index 00000000000..cfa04925b27
--- /dev/null
+++ b/jstests/core/cover_null_queries.js
@@ -0,0 +1,338 @@
+/**
+ * Test to verify that null queries can be fully covered by an index.
+ * @tags: [assumes_unsharded_collection, requires_fcv_49, requires_non_retryable_writes]
+ */
+(function() {
+"use strict";
+
+load("jstests/aggregation/extras/utils.js");  // For arrayEq().
+load("jstests/libs/analyze_plan.js");         // For getAggPlanStages() and getPlanStages().
+
+const coll = db.cover_null_queries;
+coll.drop();
+
+assert.commandWorked(coll.insertMany([
+    {_id: 1, a: 1, b: 1},
+    {_id: 2, a: 1, b: null},
+    {_id: 3, a: null, b: 1},
+    {_id: 4, a: null, b: null},
+    {_id: 5, a: 2},
+    {_id: 6, b: 2},
+    {_id: 7},
+]));
+
+/**
+ * Validates that the explain() of command 'cmdObj' has the stages in 'expectedStages'.
+ *
+ * The field 'expectedStages' should be specified as an object with keys matching the stages
+ * expected in the explain output and values indicating how many times each stage should be expected
+ * to appear in the output (useful to verify if there are two COUNT_SCANS or that there are 0
+ * IX_SCANS, for example).
+ *
+ * The field 'isAgg' is a boolean indicating whether or not the command is an aggregation.
+ */
+function validateStages({cmdObj, expectedStages, isAgg}) {
+    const explainObj = assert.commandWorked(coll.runCommand({explain: cmdObj}));
+    for (const [expectedStage, count] of Object.entries(expectedStages)) {
+        const planStages = isAgg
+            ? getAggPlanStages(explainObj, expectedStage, /* useQueryPlannerSection */ true)
+            : getPlanStages(explainObj, expectedStage);
+        assert.eq(planStages.length, count, planStages);
+        if (count > 0) {
+            for (const planStage of planStages) {
+                assert.eq(planStage.stage, expectedStage, planStage);
+            }
+        }
+    }
+}
+
+/**
+ * Runs find command with the given 'filter' and 'projection' and validates that the output returned
+ * matches 'expectedOutput'. Also runs explain() command on the same find command and validates that
+ * all the 'expectedStages' are present in the plan returned.
+ */
+function validateFindCmdOutputAndPlan({filter, projection, expectedStages, expectedOutput}) {
+    const cmdObj = {find: coll.getName(), filter: filter, projection: projection};
+    if (expectedOutput) {
+        const res = assert.commandWorked(coll.runCommand(cmdObj));
+        const ouputArray = new DBCommandCursor(coll.getDB(), res).toArray();
+        assert(arrayEq(expectedOutput, ouputArray), ouputArray);
+    }
+    validateStages({cmdObj, expectedStages});
+}
+
+/**
+ * Runs count command with the 'filter' and validates that the output returned matches
+ * 'expectedOutput'. Also runs explain() command and validates that all the 'expectedStages'
+ * are present in the plan returned.
+ */
+function validateSimpleCountCmdOutputAndPlan({filter, expectedStages, expectedCount}) {
+    const cmdObj = {count: coll.getName(), query: filter};
+    const res = assert.commandWorked(coll.runCommand(cmdObj));
+    assert.eq(res.n, expectedCount);
+    validateStages({cmdObj, expectedStages});
+}
+
+/**
+ * Runs an aggregation with a $count stage with the 'filter' applied to the $match stage and
+ * validates that the count returned matches 'expectedCount'. Also runs explain() command on the
+ * and validates that all the 'expectedStages' are present in the plan returned.
+ */
+function validateCountAggCmdOutputAndPlan({filter, expectedStages, expectedCount, pipeline}) {
+    const cmdObj = {
+        aggregate: coll.getName(),
+        pipeline: pipeline || [{$match: filter}, {$count: "count"}],
+        cursor: {},
+    };
+    const cmdRes = assert.commandWorked(coll.runCommand(cmdObj));
+    const countRes = cmdRes.cursor.firstBatch;
+    assert.eq(countRes.length, 1, cmdRes);
+    assert.eq(countRes[0].count, expectedCount, countRes);
+    validateStages({cmdObj, expectedStages, isAgg: true});
+}
+
+assert.commandWorked(coll.createIndex({a: 1, _id: 1}));
+
+// Verify count({a: null}) can be covered by an index. In the simplest case we can use two count
+// scans joined by an OR to evaluate it.
+validateSimpleCountCmdOutputAndPlan({
+    filter: {a: null},
+    expectedCount: 4,
+    expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
+});
+
+// Verify $count stage in aggregation matching {a: null} yields the same plan.
+validateCountAggCmdOutputAndPlan({
+    filter: {a: null},
+    expectedCount: 4,
+    expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
+});
+
+// Verify find({a: null}, {_id: 1}) can be covered by an index.
+validateFindCmdOutputAndPlan({
+    filter: {a: null},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1},
+});
+
+// Verify that a more complex projection that only relies on the _id field does not need a FETCH.
+validateFindCmdOutputAndPlan({
+    filter: {a: null},
+    projection: {_id: 1, incr_id: {$add: [1, "$_id"]}},
+    expectedOutput:
+        [{_id: 3, incr_id: 4}, {_id: 4, incr_id: 5}, {_id: 6, incr_id: 7}, {_id: 7, incr_id: 8}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_DEFAULT": 1},
+});
+
+// Verify that a more complex projection that computes a new field based on _id but excludes the _id
+// field does not require a fetch stage.
+validateFindCmdOutputAndPlan({
+    filter: {a: null},
+    projection: {_id: 0, incr_id: {$add: [1, "$_id"]}},
+    expectedOutput: [{incr_id: 4}, {incr_id: 5}, {incr_id: 7}, {incr_id: 8}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_DEFAULT": 1},
+});
+
+// Verify that a more complex projection that relies on any non-_id field does need a FETCH.
+validateFindCmdOutputAndPlan({
+    filter: {a: null},
+    projection: {_id: 1, incr_id: {$add: ["$a", "$_id"]}},
+    expectedOutput: [
+        {_id: 3, incr_id: null},
+        {_id: 4, incr_id: null},
+        {_id: 6, incr_id: null},
+        {_id: 7, incr_id: null}
+    ],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_DEFAULT": 1},
+});
+
+// Verify that an exclusion projection does need a FETCH.
+validateFindCmdOutputAndPlan({
+    filter: {a: null},
+    projection: {a: 0, b: 0},
+    expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_DEFAULT": 1},
+});
+
+// Verify find({a: null}, {_id: 1, b: 1}) is not covered by an index so we still have a FETCH stage.
+validateFindCmdOutputAndPlan({
+    filter: {a: null},
+    projection: {_id: 1, b: 1},
+    expectedOutput: [{_id: 3, b: 1}, {_id: 4, b: null}, {_id: 6, b: 2}, {_id: 7}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+
+// Verify find({a: null}, {a: 1}) still has a FETCH stage because the index alone cannot determine
+// if the value of field a is null, undefined, or missing.
+validateFindCmdOutputAndPlan({
+    filter: {a: null},
+    projection: {a: 1},
+    expectedOutput: [{_id: 3, a: null}, {_id: 4, a: null}, {_id: 6}, {_id: 7}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+
+// For exclusion projects we always need a FETCH stage.
+validateFindCmdOutputAndPlan({
+    filter: {a: null},
+    projection: {a: 1, _id: 0},
+    expectedOutput: [{a: null}, {a: null}, {}, {}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+
+// Verify that if the index is multikey, this optimization cannot be applied, as the index alone
+// cannot differentiate between null and [].
+assert.commandWorked(coll.insertOne({_id: 8, a: []}));
+validateSimpleCountCmdOutputAndPlan({
+    filter: {a: null, _id: 3},
+    expectedCount: 1,
+    expectedStages: {"FETCH": 1, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0}
+});
+validateCountAggCmdOutputAndPlan({
+    filter: {a: null, _id: 3},
+    expectedCount: 1,
+    expectedStages: {"FETCH": 1, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0},
+});
+validateFindCmdOutputAndPlan({
+    filter: {a: null, _id: 3},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 3}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+assert.commandWorked(coll.deleteOne({_id: 8}));
+
+// Test case when query is fully covered but we still need to fetch to correctly project a field.
+assert.commandWorked(coll.dropIndexes());
+assert.commandWorked(coll.createIndex({a: 1, _id: 1}));
+assert.commandWorked(coll.createIndex({a: 1, b: 1, _id: 1}));
+validateFindCmdOutputAndPlan({
+    filter: {a: null},
+    projection: {_id: 1, b: 1},
+    expectedOutput: [
+        {_id: 3, b: 1},
+        {_id: 4, b: null},
+        {_id: 6, b: 2},
+        {_id: 7},
+    ],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+
+// Test case when query is fully covered but predicate is not a single interval.
+assert.commandWorked(coll.dropIndexes());
+assert.commandWorked(coll.createIndex({a: 1, b: 1, _id: 1}));
+validateFindCmdOutputAndPlan({
+    filter: {a: {$in: [1, 2, 3]}, b: null},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 2}, {_id: 5}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1},
+});
+// Note that we can't use a COUNT_SCAN here because we have a complex interval.
+validateSimpleCountCmdOutputAndPlan({
+    filter: {a: {$in: [1, 2, 3]}, b: null},
+    expectedCount: 2,
+    expectedStages: {"IXSCAN": 1, "FETCH": 0},
+});
+validateCountAggCmdOutputAndPlan({
+    filter: {a: {$in: [1, 2, 3]}, b: null},
+    expectedCount: 2,
+    expectedStages: {"IXSCAN": 1, "FETCH": 0},
+});
+
+validateFindCmdOutputAndPlan({
+    filter: {a: 1, b: null},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 2}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+    filter: {a: 1, b: null},
+    expectedCount: 1,
+    expectedStages: {"OR": 1, "COUNT_SCAN": 2, "FETCH": 0},
+});
+validateCountAggCmdOutputAndPlan({
+    filter: {a: 1, b: null},
+    expectedCount: 1,
+    expectedStages: {"OR": 1, "COUNT_SCAN": 2, "FETCH": 0},
+});
+
+// Test case when counting nulls where documents are sorted in the opposite direction as the index.
+assert.commandWorked(coll.dropIndexes());
+assert.commandWorked(coll.createIndex({a: -1, b: -1}));
+validateCountAggCmdOutputAndPlan({
+    expectedCount: 2,
+    expectedStages: {"OR": 1, "COUNT_SCAN": 2, "FETCH": 0},
+    pipeline: /* Sort by field a in the opposite direction of the index. */
+        [{$match: {a: null, b: {$gt: 0}}}, {$sort: {a: 1}}, {$count: "count"}],
+});
+
+// Test case when query is fully covered, predicate is not a single interval, and the index does not
+// include the _id field. A find projection in this case will not be covered, but any count should
+// be covered.
+assert.commandWorked(coll.dropIndexes());
+assert.commandWorked(coll.createIndex({a: 1, b: 1}));
+validateFindCmdOutputAndPlan({
+    filter: {a: {$in: [1, 2, 3]}, b: null},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 2}, {_id: 5}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+// Note that we can't use a COUNT_SCAN here because we have a complex interval.
+validateSimpleCountCmdOutputAndPlan({
+    filter: {a: {$in: [1, 2, 3]}, b: null},
+    expectedCount: 2,
+    expectedStages: {"IXSCAN": 1, "FETCH": 0},
+});
+validateCountAggCmdOutputAndPlan({
+    filter: {a: {$in: [1, 2, 3]}, b: null},
+    expectedCount: 2,
+    expectedStages: {"IXSCAN": 1, "FETCH": 0},
+});
+
+// Test index intersection plan.
+assert.commandWorked(coll.dropIndexes());
+assert.commandWorked(coll.createIndex({a: 1, _id: 1}));
+assert.commandWorked(coll.createIndex({b: 1, _id: 1}));
+validateFindCmdOutputAndPlan({
+    filter: {a: null, b: null},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 4}, {_id: 7}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+validateFindCmdOutputAndPlan({
+    filter: {a: null, b: 1},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 3}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+
+// Verify the case where id field is accessed along a dotted path.
+// We still need a FETCH in this case because the index cannot differentiate between null and
+// missing values within _id, e.g. {_id: {x: null}} vs. {_id: {}} would both be returned as
+// {_id: {x: null}} by the index.
+assert.commandWorked(coll.dropIndexes());
+assert.commandWorked(coll.deleteMany({}));
+assert.commandWorked(coll.createIndex({a: 1, "_id.x": 1}));
+assert.commandWorked(coll.insertMany([
+    {a: null, _id: {x: 1}},
+    {a: null, _id: {x: 1, y: 1}},
+    {a: null, _id: {y: 1}},
+    {_id: {x: 1, y: 2}},
+    {a: "not null", _id: {x: 3}},
+]));
+validateFindCmdOutputAndPlan({
+    filter: {a: null},
+    projection: {"_id.x": 1},
+    expectedOutput: [{_id: {x: 1}}, {_id: {x: 1}}, {_id: {}}, {_id: {x: 1}}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_DEFAULT": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+    filter: {a: null},
+    expectedCount: 4,
+    expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
+});
+validateCountAggCmdOutputAndPlan({
+    filter: {a: null},
+    expectedCount: 4,
+    expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
+});
+})();
diff --git a/src/mongo/db/exec/count_scan.cpp b/src/mongo/db/exec/count_scan.cpp
index 8174e35ce2e..b37ac05a2d6 100644
--- a/src/mongo/db/exec/count_scan.cpp
+++ b/src/mongo/db/exec/count_scan.cpp
@@ -151,6 +151,8 @@ PlanStage::StageState CountScan::doWork(WorkingSetID* out) {
     }
 
     WorkingSetID id = _workingSet->allocate();
+    WorkingSetMember* member = _workingSet->get(id);
+    member->recordId = entry->loc;
     _workingSet->transitionToRecordIdAndObj(id);
     *out = id;
     return PlanStage::ADVANCED;
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 92874117844..a18f9e61d21 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -1577,6 +1577,25 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorUpda
 namespace {
 
 /**
+ * If the given 'bounds' contains a single null interval return its position, otherwise return
+ * boost::none.
+ */
+boost::optional<size_t> boundsHasExactlyOneNullInterval(const IndexBounds& bounds) {
+    boost::optional<size_t> nullFieldNo;
+    for (size_t fieldNo = 0; fieldNo < bounds.fields.size(); ++fieldNo) {
+        const OrderedIntervalList& oil = bounds.fields[fieldNo];
+        if (IndexBoundsBuilder::isNullInterval(oil)) {
+            // Return boost::none if we have multiple null intervals.
+            if (nullFieldNo) {
+                return boost::none;
+            }
+            nullFieldNo = fieldNo;
+        }
+    }
+    return nullFieldNo;
+}
+
+/**
  * Returns 'true' if the provided solution 'soln' can be rewritten to use
  * a fast counting stage.  Mutates the tree in 'soln->root'.
  *
@@ -1617,25 +1636,81 @@ bool turnIxscanIntoCount(QuerySolution* soln) {
     BSONObj endKey;
     bool endKeyInclusive;
 
+    auto makeCountScan = [&isn](BSONObj& csnStartKey,
+                                bool startKeyInclusive,
+                                BSONObj& csnEndKey,
+                                bool endKeyInclusive) {
+        // Since count scans return no data, they are always forward scans. Index scans, on the
+        // other hand, may need to scan the index in reverse order in order to obtain a sort. If the
+        // index scan direction is backwards, then we need to swap the start and end of the count
+        // scan bounds.
+        if (isn->direction < 0) {
+            csnStartKey.swap(csnEndKey);
+            std::swap(startKeyInclusive, endKeyInclusive);
+        }
+
+        auto csn = std::make_unique<CountScanNode>(isn->index);
+        csn->startKey = csnStartKey;
+        csn->startKeyInclusive = startKeyInclusive;
+        csn->endKey = csnEndKey;
+        csn->endKeyInclusive = endKeyInclusive;
+        return csn;
+    };
+
     if (!IndexBoundsBuilder::isSingleInterval(
             isn->bounds, &startKey, &startKeyInclusive, &endKey, &endKeyInclusive)) {
-        return false;
-    }
+        // If we have exactly one null interval, we should split the bounds and try to construct
+        // two COUNT_SCAN stages joined by an OR stage. If we had multiple null intervals, we would
+        // need 2^N count scans for N intervals, meaning this would quickly explode to a
+        // point where it would just be more efficient to use a single index scan. Consequently, we
+        // draw the line at one null interval.
+        if (auto nullFieldNo = boundsHasExactlyOneNullInterval(isn->bounds)) {
+            OrderedIntervalList undefinedPointOil, nullPointOil;
+            undefinedPointOil.intervals.push_back(IndexBoundsBuilder::kUndefinedPointInterval);
+            nullPointOil.intervals.push_back(IndexBoundsBuilder::kNullPointInterval);
+
+            tassert(5506501,
+                    "The index of the null interval is invalid",
+                    *nullFieldNo < isn->bounds.fields.size());
+            auto makeNullBoundsCountScan =
+                [&](OrderedIntervalList& oil) -> std::unique_ptr<QuerySolutionNode> {
+                std::swap(isn->bounds.fields[*nullFieldNo], oil);
+                ON_BLOCK_EXIT([&] { std::swap(isn->bounds.fields[*nullFieldNo], oil); });
+
+                BSONObj startKey, endKey;
+                bool startKeyInclusive, endKeyInclusive;
+                if (IndexBoundsBuilder::isSingleInterval(
+                        isn->bounds, &startKey, &startKeyInclusive, &endKey, &endKeyInclusive)) {
+                    return makeCountScan(startKey, startKeyInclusive, endKey, endKeyInclusive);
+                }
+
+                return nullptr;
+            };
+
+            auto undefinedCsn = makeNullBoundsCountScan(undefinedPointOil);
+
+            if (undefinedCsn) {
+                // If undefinedCsn is non-null, then we should also be able to successfully generate
+                // a count scan for the null interval case.
+                auto nullCsn = makeNullBoundsCountScan(nullPointOil);
+                tassert(5506500, "Invalid null bounds COUNT_SCAN", nullCsn);
 
-    // Since count scans return no data, they are always forward scans. Index scans, on the other
-    // hand, may need to scan the index in reverse order in order to obtain a sort. If the index
-    // scan direction is backwards, then we need to swap the start and end of the count scan bounds.
-    if (isn->direction < 0) {
-        startKey.swap(endKey);
-        std::swap(startKeyInclusive, endKeyInclusive);
+                auto csns = makeVector(std::move(undefinedCsn), std::move(nullCsn));
+
+                // Note that there is no need to deduplicate here because this optimization cannot
+                // be applied to multikey indexes.
+                auto orn = std::make_unique<OrNode>();
+                orn->addChildren(std::move(csns));
+                soln->setRoot(std::move(orn));
+
+                return true;
+            }
+        }
+        return false;
     }
 
     // Make the count node that we replace the fetch + ixscan with.
-    auto csn = std::make_unique<CountScanNode>(isn->index);
-    csn->startKey = startKey;
-    csn->startKeyInclusive = startKeyInclusive;
-    csn->endKey = endKey;
-    csn->endKeyInclusive = endKeyInclusive;
+    auto csn = makeCountScan(startKey, startKeyInclusive, endKey, endKeyInclusive);
     // Takes ownership of 'cn' and deletes the old root.
     soln->setRoot(std::move(csn));
     return true;
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index aa05e986aed..4176823172e 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -129,11 +129,10 @@ const Interval kHashedNullInterval =
     IndexBoundsBuilder::makePointInterval(ExpressionMapping::hash(kNullElementObj.firstElement()));
 
 Interval makeUndefinedPointInterval(bool isHashed) {
-    return isHashed ? kHashedUndefinedInterval
-                    : IndexBoundsBuilder::makePointInterval(kUndefinedElementObj);
+    return isHashed ? kHashedUndefinedInterval : IndexBoundsBuilder::kUndefinedPointInterval;
 }
 Interval makeNullPointInterval(bool isHashed) {
-    return isHashed ? kHashedNullInterval : IndexBoundsBuilder::makePointInterval(kNullElementObj);
+    return isHashed ? kHashedNullInterval : IndexBoundsBuilder::kNullPointInterval;
 }
 
 void makeNullEqualityBounds(const IndexEntry& index,
@@ -501,6 +500,11 @@ void buildBoundsForQueryElementForGT(BSONElement dataElt,
 
 }  // namespace
 
+const Interval IndexBoundsBuilder::kUndefinedPointInterval =
+    IndexBoundsBuilder::makePointInterval(kUndefinedElementObj);
+const Interval IndexBoundsBuilder::kNullPointInterval =
+    IndexBoundsBuilder::makePointInterval(kNullElementObj);
+
 void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr,
                                              const BSONElement& elt,
                                              const IndexEntry& index,
@@ -1455,4 +1459,12 @@ bool IndexBoundsBuilder::isSingleInterval(const IndexBounds& bounds,
     }
 }
 
+// static
+bool IndexBoundsBuilder::isNullInterval(const OrderedIntervalList& oil) {
+    // Checks if the the intervals are [undefined, undefined] and [null, null].
+    // Note: the order is always the same (see makeNullEqualityBounds()).
+    return 2 == oil.intervals.size() && oil.intervals[0].equals(kUndefinedPointInterval) &&
+        oil.intervals[1].equals(kNullPointInterval);
+}
+
 }  // namespace mongo
diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h
index 7f141600837..67205d64e7a 100644
--- a/src/mongo/db/query/index_bounds_builder.h
+++ b/src/mongo/db/query/index_bounds_builder.h
@@ -44,6 +44,9 @@ class CollatorInterface;
  */
 class IndexBoundsBuilder {
 public:
+    static const Interval kUndefinedPointInterval;
+    static const Interval kNullPointInterval;
+
     /**
      * Describes various degrees of precision with which predicates can be evaluated based
      * on the index bounds.
@@ -209,6 +212,12 @@ public:
                                  bool* endKeyInclusive);
 
     /**
+     * Returns 'true' if the ordered intervals 'oil' represent a strict null equality predicate.
+     * Returns 'false' otherwise.
+     */
+    static bool isNullInterval(const OrderedIntervalList& oil);
+
+    /**
      * Appends the startKey and endKey of the given "all values" 'interval' (which is either
      * [MinKey, MaxKey] or [MaxKey, MinKey] interval) to the 'startBob' and 'endBob' respectively,
      * handling inclusivity of each bound through the relevant '*KeyInclusive' parameter.
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index 46762805cec..d7627235ef8 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -937,6 +937,62 @@ std::vector<std::unique_ptr<QuerySolutionNode>> QueryPlannerAccess::collapseEqui
     return collapsedScans;
 }
 
+/**
+ * Returns true if this is a null query that can retrieve all the information it needs directly from
+ * the index, and so does not need a FETCH stage on top of it. Returns false otherwise.
+ */
+bool isCoveredNullQuery(const CanonicalQuery& query,
+                        MatchExpression* root,
+                        IndexTag* tag,
+                        const vector<IndexEntry>& indices,
+                        const QueryPlannerParams& params) {
+    // We are only interested in queries checking for an indexed field equalling null.
+    // This optimization can only be done when the index is not multikey, otherwise empty arrays
+    // in the collection will be treated as null/undefined by the index. Additionally,
+    // sparse indexes and hashed indexes should not use this optimization as they will require a
+    // FETCH stage with a filter.
+    if (indices[tag->index].multikey || indices[tag->index].sparse ||
+        indices[tag->index].type == IndexType::INDEX_HASHED ||
+        !ComparisonMatchExpressionBase::isEquality(root->matchType())) {
+        return false;
+    }
+
+    // Check if the query is looking for null values.
+    const auto node = static_cast<const ComparisonMatchExpressionBase*>(root);
+    if (node->getData().type() != BSONType::jstNULL) {
+        return false;
+    }
+
+    // If nothing is being projected, the query is fully covered without a fetch.
+    // This is trivially true for a count query.
+    if (params.options & QueryPlannerParams::Options::IS_COUNT) {
+        return true;
+    }
+
+    // This optimization can only be used for find when the index covers the projection completely.
+    // However, if the indexed field is in the projection, the index may return an incorrect value
+    // for the field, since it does not distinguish between null and undefined. Hence, only find
+    // queries projecting _id are covered.
+    auto proj = query.getProj();
+    if (!proj) {
+        return false;
+    }
+
+    // We can cover projections on _id and generated fields and expressions depending only on _id.
+    // However, if the projection is an exclusion, requires match details, requires the full
+    // document, or requires metadata, we will still need a FETCH stage.
+    if (proj->type() == projection_ast::ProjectType::kInclusion && !proj->requiresMatchDetails() &&
+        proj->metadataDeps().none() && !proj->requiresDocument()) {
+        auto projFields = proj->getRequiredFields();
+        // Note that it is not possible to project onto dotted paths of _id here, since they may be
+        // null or missing, and the index cannot differentiate between the two cases, so we would
+        // still need a FETCH stage.
+        return projFields.size() == 1 && projFields[0] == "_id";
+    }
+
+    return false;
+}
+
 bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
                                            MatchExpression* root,
                                            bool inArrayOperator,
@@ -944,7 +1000,7 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
                                            const QueryPlannerParams& params,
                                            std::vector<std::unique_ptr<QuerySolutionNode>>* out) {
     // Initialize the ScanBuildingState.
-    ScanBuildingState scanState(root, inArrayOperator, indices);
+    ScanBuildingState scanState(root, indices, inArrayOperator);
 
     while (scanState.curChild < root->numChildren()) {
         MatchExpression* child = root->getChild(scanState.curChild);
@@ -978,6 +1034,11 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
         // If we're here, we now know that 'child' can use an index directly and the index is
         // over the child's field.
 
+        // We need to track if this is a covered null query so that we can have this information
+        // at hand when handling the filter on an indexed AND.
+        scanState.isCoveredNullQuery =
+            isCoveredNullQuery(query, child, scanState.ixtag, indices, params);
+
         // If 'child' is a NOT, then the tag we're interested in is on the NOT's
         // child node.
         if (MatchExpression::NOT == child->matchType()) {
@@ -1437,7 +1498,11 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::_buildIndexedDataAccess(
             // superset of documents that satisfy the predicate, and we must check the
             // predicate.
 
-            if (tightness == IndexBoundsBuilder::EXACT) {
+            // We may also be able to avoid adding an extra fetch stage even though the bounds are
+            // inexact because the query is counting null values on an indexed field without
+            // projecting that field.
+            if (tightness == IndexBoundsBuilder::EXACT ||
+                isCoveredNullQuery(query, root, tag, indices, params)) {
                 return soln;
             } else if (tightness == IndexBoundsBuilder::INEXACT_COVERED &&
                        !indices[tag->index].multikey) {
@@ -1584,8 +1649,13 @@ void QueryPlannerAccess::handleFilterAnd(ScanBuildingState* scanState) {
         // should always be affixed as a filter. We keep 'curChild' in the $and
         // for affixing later.
         ++scanState->curChild;
-    } else if (scanState->tightness == IndexBoundsBuilder::EXACT) {
+    } else if (scanState->tightness == IndexBoundsBuilder::EXACT || scanState->isCoveredNullQuery) {
+        // The tightness of the bounds is exact or we are dealing with a covered null query.
+        // Either way, we want to remove this child so that when control returns to handleIndexedAnd
+        // we know that we don't need it to create a FETCH stage.
         root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild);
+        // Since we are not attaching this child to a filter or tracking it anywhere else, it is
+        // safe to delete it here.
         delete child;
     } else if (scanState->tightness == IndexBoundsBuilder::INEXACT_COVERED &&
                (INDEX_TEXT == index.type || !index.multikey)) {
diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h
index 3c9b03ff4d0..ff01a3fa79d 100644
--- a/src/mongo/db/query/planner_access.h
+++ b/src/mongo/db/query/planner_access.h
@@ -138,10 +138,12 @@ private:
      */
     struct ScanBuildingState {
         ScanBuildingState(MatchExpression* theRoot,
+                          const std::vector<IndexEntry>& indexList,
                           bool inArrayOp,
-                          const std::vector<IndexEntry>& indexList)
+                          bool isCoveredNull = false)
             : root(theRoot),
               inArrayOperator(inArrayOp),
+              isCoveredNullQuery(isCoveredNull),
               indices(indexList),
               currentScan(nullptr),
               curChild(0),
@@ -174,6 +176,9 @@ private:
         // Are we inside an array operator such as $elemMatch or $all?
         bool inArrayOperator;
 
+        // Is this a covered null query?
+        bool isCoveredNullQuery;
+
         // A list of relevant indices which 'root' may be tagged to use.
         const std::vector<IndexEntry>& indices;
 
diff --git a/src/mongo/db/query/query_planner_index_test.cpp b/src/mongo/db/query/query_planner_index_test.cpp
index a0a725910c9..b6232696491 100644
--- a/src/mongo/db/query/query_planner_index_test.cpp
+++ b/src/mongo/db/query/query_planner_index_test.cpp
@@ -57,6 +57,50 @@ TEST_F(QueryPlannerTest, PlannerAddsFetchToIxscanForCountWhenFetchFilterNonempty
         "{pattern: {x: 1}, bounds: {x: [[5,5,true,true]]}}}}}");
 }
 
+TEST_F(QueryPlannerTest, PlannerUsesCoveredIxscanForCountWhenIndexSatisfiesNullQuery) {
+    params.options = QueryPlannerParams::IS_COUNT;
+    addIndex(BSON("x" << 1));
+    runQuery(fromjson("{x: null}"));
+    ASSERT_EQUALS(getNumSolutions(), 1U);
+    assertSolutionExists(
+        "{ixscan: {pattern: {x: 1}, bounds: "
+        "{x: [[undefined,undefined,true,true],[null,null,true,true]]}}}");
+}
+
+TEST_F(QueryPlannerTest, PlannerAddsFetchForCountWhenMultikeyIndexSatisfiesNullQuery) {
+    params.options = QueryPlannerParams::IS_COUNT;
+    addIndex(BSON("x" << 1), true);
+    runQuery(fromjson("{x: null}"));
+    ASSERT_EQUALS(getNumSolutions(), 1U);
+    assertSolutionExists(
+        "{fetch: {filter: {x: null}, node: {ixscan: "
+        "{pattern: {x: 1}, bounds: "
+        "{x: [[undefined,undefined,true,true],[null,null,true,true]]}}}}}");
+}
+
+TEST_F(QueryPlannerTest, PlannerUsesCoveredIxscanFindWhenIndexSatisfiesNullQuery) {
+    params.options &= ~QueryPlannerParams::INCLUDE_COLLSCAN;
+    addIndex(fromjson("{x: 1, _id: 1}"));
+    runQuerySortProj(fromjson("{x: null}}"), BSONObj(), fromjson("{_id: 1}}"));
+    ASSERT_EQUALS(getNumSolutions(), 1U);
+    assertSolutionExists(
+        "{proj: {spec: {_id: 1}, node:"
+        "{ixscan: {pattern: {x: 1, _id: 1}, bounds: "
+        "{x: [[undefined,undefined,true,true],[null,null,true,true]]}}}}}");
+}
+
+TEST_F(QueryPlannerTest, PlannerAddsFetchForFindWhenMultikeyIndexSatisfiesNullQuery) {
+    params.options &= ~QueryPlannerParams::INCLUDE_COLLSCAN;
+    addIndex(fromjson("{x: 1, _id: 1}"), true);
+    runQuerySortProj(fromjson("{x: null}}"), BSONObj(), fromjson("{_id: 1}}"));
+    ASSERT_EQUALS(getNumSolutions(), 1U);
+    assertSolutionExists(
+        "{proj: {spec: {_id: 1}, node:"
+        "{fetch: {filter: {x: null}, node: {ixscan: "
+        "{pattern: {x: 1, _id: 1}, bounds: "
+        "{x: [[undefined,undefined,true,true],[null,null,true,true]]}}}}}}}");
+}
+
 //
 // Sparse indices, SERVER-8067
 // Each index in this block of tests is sparse.
author	Alya Berciu <alyacarina@gmail.com>	2021-02-24 16:44:23 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-03-18 17:37:35 +0000
commit	54a41757d77002b015c7bceca87c59f35e5a564f (patch)
tree	8c29ab5e645e4bcfe0f3bac00844f760388cf213
parent	eee4362074775ebd142cfd671dcca312ddc31fe2 (diff)
download	mongo-54a41757d77002b015c7bceca87c59f35e5a564f.tar.gz