SERVER-70436 Handle covered $or null queries with regex

author: Alya Berciu <alya.berciu@mongodb.com> 2022-11-08 16:37:07 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-11-08 17:13:58 +0000
commit: 7e3381f8a26e6bb01d4715961f6babadff651de0 (patch)
tree: f19bceb6c5a3f711e349c7ab50ea4f5f4e5dc2f9
parent: d8329fbe00da54b5f8b0f60889364e7920968756 (diff)
download: mongo-7e3381f8a26e6bb01d4715961f6babadff651de0.tar.gz
10 files changed, 644 insertions, 148 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 2bcfc26a6e9..6b118574a7f 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -220,6 +220,8 @@ last-continuous:
     ticket: SERVER-68728
   - test_file: jstests/aggregation/expressions/switch_errors.js
     ticket: SERVER-70190
+  - test_file: jstests/core/cover_null_queries.js
+    ticket: SERVER-70436
   suites: null
 last-lts:
   all:
@@ -517,4 +519,6 @@ last-lts:
     ticket: SERVER-68728
   - test_file: jstests/aggregation/expressions/switch_errors.js
     ticket: SERVER-70190
+  - test_file: jstests/core/cover_null_queries.js
+    ticket: SERVER-70436
   suites: null
diff --git a/jstests/core/cover_null_queries.js b/jstests/core/cover_null_queries.js
index 50bf0b58b07..a05cb8ec3f8 100644
--- a/jstests/core/cover_null_queries.js
+++ b/jstests/core/cover_null_queries.js
@@ -3,7 +3,9 @@
  * @tags: [
  *   assumes_unsharded_collection,
  *   requires_non_retryable_writes,
- *   requires_fcv_52
+ *   requires_fcv_60,
+ *   # This test could produce unexpected explain output if additional indexes are created.
+ *   assumes_no_implicit_index_creation,
  * ]
  */
 (function() {
@@ -57,12 +59,22 @@ function validateStages({cmdObj, expectedStages, isAgg}) {
  */
 function validateFindCmdOutputAndPlan({filter, projection, expectedStages, expectedOutput}) {
     const cmdObj = {find: coll.getName(), filter: filter, projection: projection};
+
+    // Compare index output with expected output.
     if (expectedOutput) {
         const res = assert.commandWorked(coll.runCommand(cmdObj));
         const ouputArray = new DBCommandCursor(coll.getDB(), res).toArray();
         assert(arrayEq(expectedOutput, ouputArray), ouputArray);
     }
+
+    // Validate explain.
     validateStages({cmdObj, expectedStages});
+
+    // Verify that we get the same output as we expect without an index.
+    const noIndexCmdObj = Object.assign(cmdObj, {hint: {$natural: 1}});
+    const resNoIndex = assert.commandWorked(coll.runCommand(noIndexCmdObj));
+    const noIndexOutArr = new DBCommandCursor(coll.getDB(), resNoIndex).toArray();
+    assert(arrayEq(expectedOutput, noIndexOutArr), noIndexOutArr);
 }
 
 /**
@@ -71,10 +83,18 @@ function validateFindCmdOutputAndPlan({filter, projection, expectedStages, expec
  * are present in the plan returned.
  */
 function validateSimpleCountCmdOutputAndPlan({filter, expectedStages, expectedCount}) {
+    // Compare index output with expected output.
     const cmdObj = {count: coll.getName(), query: filter};
     const res = assert.commandWorked(coll.runCommand(cmdObj));
     assert.eq(res.n, expectedCount);
+
+    // Validate explain.
     validateStages({cmdObj, expectedStages});
+
+    // Verify that we get the same output with and without an index.
+    const noIndexCmdObj = Object.assign(cmdObj, {hint: {$natural: 1}});
+    const resNoIndex = assert.commandWorked(coll.runCommand(noIndexCmdObj));
+    assert.eq(resNoIndex.n, expectedCount);
 }
 
 /**
@@ -88,11 +108,33 @@ function validateCountAggCmdOutputAndPlan({filter, expectedStages, expectedCount
         pipeline: pipeline || [{$match: filter}, {$count: "count"}],
         cursor: {},
     };
+
+    // Compare index output with expected output.
     const cmdRes = assert.commandWorked(coll.runCommand(cmdObj));
     const countRes = cmdRes.cursor.firstBatch;
     assert.eq(countRes.length, 1, cmdRes);
     assert.eq(countRes[0].count, expectedCount, countRes);
+
+    // Validate explain.
     validateStages({cmdObj, expectedStages, isAgg: true});
+
+    // Verify that we get the same output as we expect without an index.
+    const noIndexCmdObj = Object.assign(cmdObj, {hint: {$natural: 1}});
+    const resNoIndex = assert.commandWorked(coll.runCommand(noIndexCmdObj));
+    const countResNoIndex = resNoIndex.cursor.firstBatch;
+    assert.eq(countResNoIndex.length, 1, cmdRes);
+    assert.eq(countResNoIndex[0].count, expectedCount, countRes);
+}
+
+/**
+ * Same as above, but uses a $group count.
+ */
+function validateGroupCountAggCmdOutputAndPlan({filter, expectedStages, expectedCount}) {
+    validateCountAggCmdOutputAndPlan({
+        expectedStages,
+        expectedCount,
+        pipeline: [{$match: filter}, {$group: {_id: 0, count: {$count: {}}}}]
+    });
 }
 
 assert.commandWorked(coll.createIndex({a: 1, _id: 1}));
@@ -152,7 +194,7 @@ validateFindCmdOutputAndPlan({
     expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1},
 });
 
-// Same as above, but special case for null and empty array predicate.
+// We can cover a $in with null and an empty array predicate.
 validateFindCmdOutputAndPlan({
     filter: {a: {$in: [null, []]}},
     projection: {_id: 1},
@@ -165,6 +207,21 @@ validateSimpleCountCmdOutputAndPlan({
     expectedStages: {"IXSCAN": 1, "FETCH": 0},
 });
 
+// We cannot cover a $in with null and an array predicate.
+// TODO SERVER-71058: It should be possible to cover this case and the more general case of matching
+// an array on a non-multikey index.
+validateFindCmdOutputAndPlan({
+    filter: {a: {$in: [null, ["a"]]}},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+    filter: {a: {$in: [null, ["a"]]}},
+    expectedCount: 4,
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "COUNT": 1},
+});
+
 // Verify that a more complex projection that only relies on the _id field does not need a FETCH.
 validateFindCmdOutputAndPlan({
     filter: {a: null},
@@ -691,4 +748,265 @@ validateCountAggCmdOutputAndPlan({
     expectedCount: 4,
     expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
 });
+
+// Validate that we can use the optimization when we have regex without array elements in a $in or
+// $or. See SERVER-70436 for more details.
+coll.drop();
+
+assert.commandWorked(coll.insertMany([
+    {_id: 1, a: '123456'},
+    {_id: 2, a: '1234567'},
+    {_id: 3, a: ' 12345678'},
+    {_id: 4, a: '444456'},
+    {_id: 5, a: ''},
+    {_id: 6, a: null},
+    {_id: 7},
+]));
+
+assert.commandWorked(coll.createIndex({a: 1, _id: 1}));
+
+// TODO SERVER-70998: Can apply optimization in case without regex; however, we still can't use a
+// COUNT_SCAN in this case.
+validateFindCmdOutputAndPlan({
+    filter: {$or: [{a: null}, {a: ""}]},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 5}, {_id: 6}, {_id: 7}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+    filter: {$or: [{a: null}, {a: ""}]},
+    expectedCount: 3,
+    expectedStages: {"COUNT": 1, "IXSCAN": 1, "FETCH": 0},
+});
+validateGroupCountAggCmdOutputAndPlan({
+    filter: {$or: [{a: null}, {a: ""}]},
+    expectedCount: 3,
+    expectedStages: {"IXSCAN": 1, "FETCH": 0},
+});
+
+// Can still apply optimization when we have regex.
+validateFindCmdOutputAndPlan({
+    filter: {$or: [{a: null}, {a: {$regex: "^$"}}]},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 5}, {_id: 6}, {_id: 7}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+    filter: {$or: [{a: null}, {a: {$regex: "^$"}}]},
+    expectedCount: 3,
+    expectedStages: {"IXSCAN": 1, "FETCH": 0, "COUNT": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+    filter: {$or: [{a: null}, {a: {$regex: "^$"}}]},
+    expectedCount: 3,
+    expectedStages: {"IXSCAN": 1, "FETCH": 0},
+});
+
+// Now test case with a multikey index. We can't leverage the optimization here.
+assert.commandWorked(coll.insert({_id: 8, a: [1, 2, 3]}));
+assert.commandWorked(coll.insert({_id: 9, a: []}));
+
+validateFindCmdOutputAndPlan({
+    filter: {$or: [{a: null}, {a: []}, {a: {$regex: "^$"}}]},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 5}, {_id: 6}, {_id: 7}, {_id: 9}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+    filter: {$or: [{a: null}, {a: []}, {a: {$regex: "^$"}}]},
+    expectedCount: 4,
+    expectedStages: {"COUNT": 1, "IXSCAN": 1, "FETCH": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+    filter: {$or: [{a: null}, {a: []}, {a: {$regex: "^$"}}]},
+    expectedCount: 4,
+    expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+
+// We also shouldn't cover queries on multikey indexes where $in includes an array, as we will still
+// need a filter after the IXSCAN to correctly return
+validateFindCmdOutputAndPlan({
+    filter: {$or: [{a: null}, {a: []}, {a: [2]}]},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 6}, {_id: 7}, {_id: 9}],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+    filter: {$or: [{a: null}, {a: []}, {a: [2]}]},
+    expectedCount: 3,
+    expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+    filter: {$or: [{a: null}, {a: []}, {a: [2]}]},
+    expectedCount: 3,
+    expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+
+// Validate that when we have a dotted path, we return the correct results for null queries.
+coll.drop();
+assert.commandWorked(coll.insertMany([
+    {_id: 1, a: 1},
+    {_id: 2, a: null},
+    {_id: 3},
+    {_id: 4, a: {b: 1}},
+    {_id: 5, a: {b: null}},
+    {_id: 6, a: {c: 1}},
+]));
+assert.commandWorked(coll.createIndex({"a.b": 1, _id: 1}));
+
+validateFindCmdOutputAndPlan({
+    filter: {"a.b": null},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 1}, {_id: 2}, {_id: 3}, {_id: 5}, {_id: 6}],
+    expectedStages: {"IXSCAN": 1, "PROJECTION_COVERED": 1, "FETCH": 0},
+});
+validateSimpleCountCmdOutputAndPlan({
+    filter: {"a.b": null},
+    expectedCount: 5,
+    expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
+});
+validateGroupCountAggCmdOutputAndPlan({
+    filter: {"a.b": null},
+    expectedCount: 5,
+    expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
+});
+
+validateFindCmdOutputAndPlan({
+    filter: {a: {b: null}},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 5}],
+    expectedStages: {"COLLSCAN": 1, "PROJECTION_SIMPLE": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+    filter: {a: {b: null}},
+    expectedCount: 1,
+    expectedStages: {"COLLSCAN": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+    filter: {a: {b: null}},
+    expectedCount: 1,
+    expectedStages: {"COLLSCAN": 1},
+});
+
+// Still need fetch if we don't have a sufficiently restrictive projection.
+validateFindCmdOutputAndPlan({
+    filter: {"a.b": null},
+    projection: {_id: 1, a: 1},
+    expectedOutput: [
+        {_id: 1, a: 1},
+        {_id: 2, a: null},
+        {_id: 3},
+        {_id: 5, a: {b: null}},
+        {_id: 6, a: {c: 1}},
+    ],
+    expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+
+// Make index multikey, and test case where field b is nested in an array.
+assert.commandWorked(coll.insertMany([
+    {_id: 7, a: [{b: null}]},
+    {_id: 8, a: [{b: []}]},
+    {_id: 9, a: [{b: [1, 2, 3]}]},
+    {_id: 10, a: [{b: 123}]},
+    {_id: 11, a: [{c: 123}]},
+    {_id: 12, a: []},
+    {_id: 13, a: [{}]},
+    {_id: 14, a: [1, 2, 3]},
+    {_id: 15, a: [{b: 1}, {c: 2}, {b: 3}]},
+    {_id: 16, a: [null]},
+]));
+
+validateFindCmdOutputAndPlan({
+    filter: {"a.b": null},
+    projection: {_id: 1},
+    expectedOutput: [
+        {_id: 1},
+        {_id: 2},
+        {_id: 3},
+        {_id: 5},
+        {_id: 6},
+        {_id: 7},
+        {_id: 11},
+        {_id: 13},
+        {_id: 15}
+    ],
+    expectedStages: {"IXSCAN": 1, "PROJECTION_SIMPLE": 1, "FETCH": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+    filter: {"a.b": null},
+    expectedCount: 9,
+    expectedStages: {"COUNT": 1, "IXSCAN": 1, "FETCH": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+    filter: {"a.b": null},
+    expectedCount: 9,
+    expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+
+validateFindCmdOutputAndPlan({
+    filter: {a: {b: null}},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 5}, {_id: 7}],
+    expectedStages: {
+        "COLLSCAN": 1,
+        "PROJECTION_SIMPLE": 1,
+    },
+});
+validateSimpleCountCmdOutputAndPlan({
+    filter: {a: {b: null}},
+    expectedCount: 2,
+    expectedStages: {"COLLSCAN": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+    filter: {a: {b: null}},
+    expectedCount: 2,
+    expectedStages: {"COLLSCAN": 1},
+});
+
+validateFindCmdOutputAndPlan({
+    filter: {a: [{b: null}]},
+    projection: {_id: 1},
+    expectedOutput: [{_id: 7}],
+    expectedStages: {"COLLSCAN": 1, "PROJECTION_SIMPLE": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+    filter: {a: [{b: null}]},
+    expectedCount: 1,
+    expectedStages: {"COLLSCAN": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+    filter: {a: [{b: null}]},
+    expectedCount: 1,
+    expectedStages: {"COLLSCAN": 1},
+});
+
+// We still need a FETCH for composite paths, because both {a: [1,2,3]} and {"a.b": null} generate
+// null index keys, but the former should not match the predicate below.
+validateFindCmdOutputAndPlan({
+    filter: {"a.b": {$in: [null, []]}},
+    projection: {_id: 1},
+    expectedOutput: [
+        {_id: 1},
+        {_id: 2},
+        {_id: 3},
+        {_id: 5},
+        {_id: 6},
+        {_id: 7},
+        {_id: 8},
+        {_id: 11},
+        {_id: 13},
+        {_id: 15}
+    ],
+    expectedStages: {"IXSCAN": 1, "PROJECTION_SIMPLE": 1, "FETCH": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+    filter: {"a.b": {$in: [null, []]}},
+    expectedCount: 10,
+    expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+    filter: {"a.b": {$in: [null, []]}},
+    expectedCount: 10,
+    expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
 })();
diff --git a/jstests/core/null_query_semantics.js b/jstests/core/null_query_semantics.js
index 1ff14cc710c..aa9042c5ac5 100644
--- a/jstests/core/null_query_semantics.js
+++ b/jstests/core/null_query_semantics.js
@@ -41,6 +41,9 @@ function testNullSemantics(coll) {
             [{_id: "a_null", a: null}, {_id: "a_undefined", a: undefined}, {_id: "no_a"}];
         assert(resultsEq(expected, noProjectResults), tojson(noProjectResults));
 
+        const count = coll.count({a: {$eq: null}});
+        assert.eq(count, expected.length);
+
         const projectResults = coll.find({a: {$eq: null}}, projectToOnlyA).toArray();
         assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults));
     }());
@@ -57,6 +60,9 @@ function testNullSemantics(coll) {
         ];
         assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
 
+        const count = coll.count({a: {$ne: null}});
+        assert.eq(count, expected.length);
+
         const projectResults = coll.find({a: {$ne: null}}, projectToOnlyA).toArray();
         assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults));
     }());
@@ -72,6 +78,9 @@ function testNullSemantics(coll) {
             {_id: "no_a"},
         ];
 
+        const count = coll.count(query);
+        assert.eq(count, expected.length);
+
         assert(resultsEq(noProjectResults, expected), noProjectResults);
 
         const projectResults = coll.find(query, projectToOnlyA).toArray();
@@ -89,6 +98,9 @@ function testNullSemantics(coll) {
             {_id: "no_a"},
         ];
 
+        const count = coll.count(query);
+        assert.eq(count, expected.length);
+
         assert(resultsEq(noProjectResults, expected), noProjectResults);
 
         const projectResults = coll.find(query, projectToOnlyA).toArray();
@@ -106,6 +118,9 @@ function testNullSemantics(coll) {
             {_id: "a_subobject_b_undefined", a: {b: undefined}},
         ];
 
+        const count = coll.count(query);
+        assert.eq(count, expected.length);
+
         assert(resultsEq(noProjectResults, expected), noProjectResults);
 
         const projectResults = coll.find(query, projectToOnlyA).toArray();
@@ -126,6 +141,9 @@ function testNullSemantics(coll) {
         ];
         assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
 
+        const count = coll.count(query);
+        assert.eq(count, expected.length);
+
         const projectResults = coll.find(query, projectToOnlyA).toArray();
         assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
     }());
@@ -137,6 +155,9 @@ function testNullSemantics(coll) {
         ];
         assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
 
+        const count = coll.count({a: {$exists: false}});
+        assert.eq(count, expected.length);
+
         const projectResults = coll.find({a: {$exists: false}}, projectToOnlyA).toArray();
         assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults));
     }());
@@ -144,17 +165,19 @@ function testNullSemantics(coll) {
     // Test the semantics of the query {"a.b": {$eq: null}}.
     (function testDottedEqualsNull() {
         const noProjectResults = coll.find({"a.b": {$eq: null}}).toArray();
-        assert(resultsEq(noProjectResults,
-                         [
-                             {_id: "a_empty_subobject", a: {}},
-                             {_id: "a_null", a: null},
-                             {_id: "a_number", a: 4},
-                             {_id: "a_subobject_b_null", a: {b: null}},
-                             {_id: "a_subobject_b_undefined", a: {b: undefined}},
-                             {_id: "a_undefined", a: undefined},
-                             {_id: "no_a"}
-                         ]),
-               tojson(noProjectResults));
+        const expected = [
+            {_id: "a_empty_subobject", a: {}},
+            {_id: "a_null", a: null},
+            {_id: "a_number", a: 4},
+            {_id: "a_subobject_b_null", a: {b: null}},
+            {_id: "a_subobject_b_undefined", a: {b: undefined}},
+            {_id: "a_undefined", a: undefined},
+            {_id: "no_a"}
+        ];
+        assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+
+        const count = coll.count({"a.b": {$eq: null}});
+        assert.eq(count, expected.length);
 
         const projectResults = coll.find({"a.b": {$eq: null}}, projectToOnlyADotB).toArray();
         assert(resultsEq(projectResults,
@@ -165,8 +188,11 @@ function testNullSemantics(coll) {
     // Test the semantics of the query {"a.b": {$ne: null}}.
     (function testDottedNotEqualsNull() {
         const noProjectResults = coll.find({"a.b": {$ne: null}}).toArray();
-        assert(resultsEq(noProjectResults, [{_id: "a_subobject_b_not_null", a: {b: "hi"}}]),
-               tojson(noProjectResults));
+        const expected = [{_id: "a_subobject_b_not_null", a: {b: "hi"}}];
+        assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+
+        const count = coll.count({"a.b": {$ne: null}});
+        assert.eq(count, expected.length);
 
         const projectResults = coll.find({"a.b": {$ne: null}}, projectToOnlyADotB).toArray();
         assert(resultsEq(projectResults, [{a: {b: "hi"}}]), tojson(projectResults));
@@ -183,6 +209,9 @@ function testNullSemantics(coll) {
         ];
         assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
 
+        const count = coll.count({"a.b": {$exists: false}});
+        assert.eq(count, expected.length);
+
         const projectResults = coll.find({"a.b": {$exists: false}}, projectToOnlyADotB).toArray();
         assert(resultsEq(projectResults, [{}, {a: {}}, {}, {}, {}]), tojson(projectResults));
     }());
@@ -202,6 +231,9 @@ function testNullSemantics(coll) {
                    `Expected no results for query ${tojson(elemMatchQuery)}, got ` +
                        tojson(noProjectResults));
 
+            const count = coll.count(elemMatchQuery);
+            assert.eq(count, 0);
+
             let projectResults = coll.find(elemMatchQuery, projectToOnlyA).toArray();
             assert(resultsEq(projectResults, []),
                    `Expected no results for query ${tojson(elemMatchQuery)}, got ` +
@@ -250,6 +282,9 @@ function testNullSemantics(coll) {
         ];
         assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
 
+        const count = coll.count({a: {$eq: null}});
+        assert.eq(count, expected.length);
+
         const projectResults = coll.find({a: {$eq: null}}, projectToOnlyA).toArray();
         assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults));
     }());
@@ -274,6 +309,9 @@ function testNullSemantics(coll) {
         ];
         assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
 
+        const count = coll.count({a: {$ne: null}});
+        assert.eq(count, expected.length);
+
         const projectResults = coll.find({a: {$ne: null}}, projectToOnlyA).toArray();
         assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults));
     }());
@@ -297,6 +335,8 @@ function testNullSemantics(coll) {
             {_id: "a_value_array_no_nulls", a: [1, "string", 4]},
         ];
         assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+        const count = coll.count({a: {$not: {$gte: null}}});
+        assert.eq(count, expected.length);
     }());
 
     // Test the semantics of the query {a: {$in: [null, <number>]}}.
@@ -314,6 +354,9 @@ function testNullSemantics(coll) {
 
         assert(resultsEq(noProjectResults, expected), noProjectResults);
 
+        const count = coll.count({a: {$in: [null, 75]}});
+        assert.eq(count, expected.length);
+
         const projectResults = coll.find(query, projectToOnlyA).toArray();
         assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
     }());
@@ -333,6 +376,9 @@ function testNullSemantics(coll) {
 
         assert(resultsEq(noProjectResults, expected), noProjectResults);
 
+        const count = coll.count({$or: [{a: null}, {a: 75}]});
+        assert.eq(count, expected.length);
+
         const projectResults = coll.find(query, projectToOnlyA).toArray();
         assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
     }());
@@ -360,6 +406,9 @@ function testNullSemantics(coll) {
 
         assert(resultsEq(noProjectResults, expected), noProjectResults);
 
+        const count = coll.count({a: {$nin: [null, 75]}});
+        assert.eq(count, expected.length);
+
         const projectResults = coll.find(query, projectToOnlyA).toArray();
         assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
     }());
@@ -395,6 +444,9 @@ function testNullSemantics(coll) {
 
         assert(resultsEq(noProjectResults, expected), noProjectResults);
 
+        const count = coll.count({a: {$nin: [null, []]}});
+        assert.eq(count, expected.length);
+
         const projectResults = coll.find(query, projectToOnlyA).toArray();
         assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
     }());
@@ -420,6 +472,9 @@ function testNullSemantics(coll) {
 
         assert(resultsEq(noProjectResults, expected), noProjectResults);
 
+        const count = coll.count(query);
+        assert.eq(count, expected.length);
+
         const projectResults = coll.find(query, projectToOnlyA).toArray();
         assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
     }());
@@ -435,6 +490,9 @@ function testNullSemantics(coll) {
         ];
         assert(resultsEq(noProjectResults, expectedEqualToNull), tojson(noProjectResults));
 
+        const count = coll.count({a: {$elemMatch: {$eq: null}}});
+        assert.eq(count, expectedEqualToNull.length);
+
         let projectResults = coll.find({a: {$elemMatch: {$eq: null}}}, projectToOnlyA).toArray();
         assert(resultsEq(projectResults, extractAValues(expectedEqualToNull)),
                tojson(projectResults));
@@ -467,25 +525,23 @@ function testNullSemantics(coll) {
     //    value for "b".
     (function testDottedEqualsNull() {
         const noProjectResults = coll.find({"a.b": {$eq: null}}).toArray();
-        assert(
-            resultsEq(noProjectResults,
-                      [
-                          {_id: "a_empty_subobject", a: {}},
-                          {_id: "a_null", a: null},
-                          {_id: "a_number", a: 4},
-                          {_id: "a_subobject_b_null", a: {b: null}},
-                          {_id: "a_subobject_b_undefined", a: {b: undefined}},
-                          {_id: "a_undefined", a: undefined},
-                          {_id: "no_a"},
-                          {
-                              _id: "a_object_array_all_b_nulls",
-                              a: [{b: null}, {b: undefined}, {b: null}, {}]
-                          },
-                          {_id: "a_object_array_some_b_nulls", a: [{b: null}, {b: 3}, {b: null}]},
-                          {_id: "a_object_array_some_b_undefined", a: [{b: undefined}, {b: 3}]},
-                          {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]},
-                      ]),
-            tojson(noProjectResults));
+        const expected = [
+            {_id: "a_empty_subobject", a: {}},
+            {_id: "a_null", a: null},
+            {_id: "a_number", a: 4},
+            {_id: "a_subobject_b_null", a: {b: null}},
+            {_id: "a_subobject_b_undefined", a: {b: undefined}},
+            {_id: "a_undefined", a: undefined},
+            {_id: "no_a"},
+            {_id: "a_object_array_all_b_nulls", a: [{b: null}, {b: undefined}, {b: null}, {}]},
+            {_id: "a_object_array_some_b_nulls", a: [{b: null}, {b: 3}, {b: null}]},
+            {_id: "a_object_array_some_b_undefined", a: [{b: undefined}, {b: 3}]},
+            {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]},
+        ];
+        assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+
+        const count = coll.count({"a.b": {$eq: null}});
+        assert.eq(count, expected.length);
 
         const projectResults = coll.find({"a.b": {$eq: null}}, projectToOnlyADotB).toArray();
         assert(resultsEq(projectResults,
@@ -508,18 +564,20 @@ function testNullSemantics(coll) {
     // Test the semantics of the query {"a.b": {$ne: null}}.
     (function testDottedNotEqualsNull() {
         const noProjectResults = coll.find({"a.b": {$ne: null}}).toArray();
-        assert(resultsEq(noProjectResults,
-                         [
-                             {_id: "a_subobject_b_not_null", a: {b: "hi"}},
-                             {_id: "a_double_array", a: [[]]},
-                             {_id: "a_empty_array", a: []},
-                             {_id: "a_object_array_no_b_nulls", a: [{b: 1}, {b: 3}, {b: "string"}]},
-                             {_id: "a_value_array_all_nulls", a: [null, null]},
-                             {_id: "a_value_array_no_nulls", a: [1, "string", 4]},
-                             {_id: "a_value_array_with_null", a: [1, "string", null, 4]},
-                             {_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]}
-                         ]),
-               tojson(noProjectResults));
+        const expected = [
+            {_id: "a_subobject_b_not_null", a: {b: "hi"}},
+            {_id: "a_double_array", a: [[]]},
+            {_id: "a_empty_array", a: []},
+            {_id: "a_object_array_no_b_nulls", a: [{b: 1}, {b: 3}, {b: "string"}]},
+            {_id: "a_value_array_all_nulls", a: [null, null]},
+            {_id: "a_value_array_no_nulls", a: [1, "string", 4]},
+            {_id: "a_value_array_with_null", a: [1, "string", null, 4]},
+            {_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]}
+        ];
+        assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+
+        const count = coll.count({"a.b": {$ne: null}});
+        assert.eq(count, expected.length);
 
         const projectResults = coll.find({"a.b": {$ne: null}}, projectToOnlyADotB).toArray();
         assert(resultsEq(projectResults,
@@ -554,6 +612,9 @@ function testNullSemantics(coll) {
             {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]},
         ];
 
+        const count = coll.count({"a.b": {$in: [null, 75]}});
+        assert.eq(count, expected.length);
+
         assert(resultsEq(noProjectResults, expected), noProjectResults);
     }());
 
@@ -575,6 +636,9 @@ function testNullSemantics(coll) {
             {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]},
         ];
 
+        const count = coll.count({$or: [{"a.b": null}, {"a.b": 75}]});
+        assert.eq(count, expected.length);
+
         assert(resultsEq(noProjectResults, expected), noProjectResults);
     }());
 
@@ -595,6 +659,9 @@ function testNullSemantics(coll) {
 
         assert(resultsEq(noProjectResults, expected), noProjectResults);
 
+        const count = coll.count({"a.b": {$nin: [null, 75]}});
+        assert.eq(count, expected.length);
+
         const projectResults = coll.find(query, projectToOnlyA).toArray();
         assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
     }());
@@ -613,6 +680,9 @@ function testNullSemantics(coll) {
             {_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]},
         ];
 
+        const count = coll.count({"a.b": {$nin: [null, /^str.*/]}});
+        assert.eq(count, expected.length);
+
         assert(resultsEq(noProjectResults, expected), noProjectResults);
 
         const projectResults = coll.find(query, projectToOnlyA).toArray();
@@ -625,6 +695,9 @@ function testNullSemantics(coll) {
         let results = coll.find({"a.b": {$elemMatch: {$eq: null}}}).toArray();
         assert(resultsEq(results, []), tojson(results));
 
+        const count = coll.count({"a.b": {$elemMatch: {$eq: null}}});
+        assert.eq(count, 0);
+
         results = coll.find({"a.b": {$elemMatch: {$ne: null}}}).toArray();
         assert(resultsEq(results, []), tojson(results));
     }());
@@ -642,6 +715,9 @@ function testNullSemantics(coll) {
         ];
         assert(resultsEq(noProjectResults, expectedEqualToNull), tojson(noProjectResults));
 
+        const count = coll.count({a: {$elemMatch: {b: {$eq: null}}}});
+        assert.eq(count, expectedEqualToNull.length);
+
         let projectResults =
             coll.find({a: {$elemMatch: {b: {$eq: null}}}}, projectToOnlyADotB).toArray();
         assert(resultsEq(projectResults,
diff --git a/src/mongo/db/matcher/expression_leaf.cpp b/src/mongo/db/matcher/expression_leaf.cpp
index 31157666e92..990f91c1796 100644
--- a/src/mongo/db/matcher/expression_leaf.cpp
+++ b/src/mongo/db/matcher/expression_leaf.cpp
@@ -434,6 +434,7 @@ std::unique_ptr<MatchExpression> InMatchExpression::shallowClone() const {
     }
     next->_hasNull = _hasNull;
     next->_hasEmptyArray = _hasEmptyArray;
+    next->_hasNonEmptyArrayOrObject = _hasNonEmptyArrayOrObject;
     next->_equalitySet = _equalitySet;
     next->_originalEqualityVector = _originalEqualityVector;
     next->_equalityStorage = _equalityStorage;
@@ -577,6 +578,8 @@ Status InMatchExpression::setEqualities(std::vector<BSONElement> equalities) {
             _hasNull = true;
         } else if (equality.type() == BSONType::Array && equality.Obj().isEmpty()) {
             _hasEmptyArray = true;
+        } else if (equality.type() == BSONType::Array || equality.type() == BSONType::Object) {
+            _hasNonEmptyArrayOrObject = true;
         }
     }
 
diff --git a/src/mongo/db/matcher/expression_leaf.h b/src/mongo/db/matcher/expression_leaf.h
index 46a80aa5e91..a00ed9e31f1 100644
--- a/src/mongo/db/matcher/expression_leaf.h
+++ b/src/mongo/db/matcher/expression_leaf.h
@@ -723,6 +723,14 @@ public:
         return _hasEmptyArray;
     }
 
+    bool hasNonEmptyArrayOrObject() const {
+        return _hasNonEmptyArrayOrObject;
+    }
+
+    bool hasNonScalarOrNonEmptyValues() const {
+        return hasNonEmptyArrayOrObject() || hasNull() || hasRegex();
+    }
+
     void acceptVisitor(MatchExpressionMutableVisitor* visitor) final {
         visitor->visit(this);
     }
@@ -748,6 +756,9 @@ private:
     // Whether or not '_equalities' has an empty array element in it.
     bool _hasEmptyArray = false;
 
+    // Whether or not '_equalities' has a non-empty array or object element in it.
+    bool _hasNonEmptyArrayOrObject = false;
+
     // Collator used to construct '_eltCmp';
     const CollatorInterface* _collator = nullptr;
 
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index 01e13b5f058..3c027bde140 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -108,13 +108,38 @@ Interval makeNullPointInterval(bool isHashed) {
     return isHashed ? kHashedNullInterval : IndexBoundsBuilder::kNullPointInterval;
 }
 
+/**
+ * This helper updates the query bounds tightness for the limited set of conditions where we see a
+ * null query that can be covered.
+ */
+void updateTightnessForNullQuery(const IndexEntry& index,
+                                 IndexBoundsBuilder::BoundsTightness* tightnessOut) {
+    if (index.sparse || index.type == IndexType::INDEX_HASHED) {
+        // Sparse indexes and hashed indexes require a FETCH stage with a filter for null queries.
+        *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+        return;
+    }
+
+    if (index.multikey) {
+        // If we have a simple equality null query and our index is multikey, we cannot cover the
+        // query. This is because null intervals are translated into the null and undefined point
+        // intervals, and the undefined point interval includes entries for []. In the case of a
+        // single null interval, [] should not match.
+        *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+        return;
+    }
+
+    // The query may be fully covered by the index if the projection allows it, since the case above
+    // about the empty array can only become an issue if there is an empty array present, which
+    // would mark the index as multikey.
+    *tightnessOut = IndexBoundsBuilder::EXACT_MAYBE_COVERED;
+}
+
 void makeNullEqualityBounds(const IndexEntry& index,
                             bool isHashed,
                             OrderedIntervalList* oil,
                             IndexBoundsBuilder::BoundsTightness* tightnessOut) {
-    // An equality to null predicate cannot be covered because the index does not distinguish
-    // between the lack of a value and the literal value null.
-    *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+    updateTightnessForNullQuery(index, tightnessOut);
 
     // There are two values that could possibly be equal to null in an index: undefined and null.
     oil->intervals.push_back(makeUndefinedPointInterval(isHashed));
@@ -254,7 +279,10 @@ bool IndexBoundsBuilder::canUseCoveredMatching(const MatchExpression* expr,
     IndexBoundsBuilder::BoundsTightness tightness;
     OrderedIntervalList oil;
     translate(expr, BSONElement{}, index, &oil, &tightness, /* iet::Builder */ nullptr);
-    return tightness >= IndexBoundsBuilder::INEXACT_COVERED;
+    // We have additional tightness values (MAYBE_COVERED), but we cannot generally cover those
+    // cases unless we have an appropriate projection.
+    return tightness == IndexBoundsBuilder::INEXACT_COVERED ||
+        tightness == IndexBoundsBuilder::EXACT;
 }
 
 // static
@@ -404,6 +432,61 @@ const Interval IndexBoundsBuilder::kNullPointInterval =
 const Interval IndexBoundsBuilder::kEmptyArrayPointInterval =
     IndexBoundsBuilder::makePointInterval(kEmptyArrayElementObj);
 
+bool detectIfEntireNullIntervalMatchesPredicate(const InMatchExpression* ime,
+                                                const IndexEntry& index) {
+    if (!ime->hasNull()) {
+        // This isn't a null query.
+        return false;
+    }
+
+    if (index.sparse || (IndexType::INDEX_HASHED == index.type)) {
+        // Sparse indexes and hashed indexes still require a FETCH stage with a filter for null
+        // queries.
+        return false;
+    }
+
+    // Given the context of having a null $in query with eligible indexes, we may be able to cover
+    // some combinations of intervals that we could not cover individually.
+    if (index.multikey) {
+        // If the path has multiple components and we have a multikey index, we still need a FETCH
+        // in order to defend against cases where we have a multikey index on "a". These documents
+        // will generate null index keys: {"a.b": null} and {a: [1,2,3]}. However, a query like
+        // {"a.b": {$in: [null, []]}} should not match {a: [1, 2, 3]}.
+        // TODO SERVER-71021: it may be possible to cover more cases here.
+        if (ime->fieldRef()->numParts() > 1) {
+            return false;
+        }
+
+        // We must have an equality to an empty array for this null query to be covered, otherwise,
+        // because we generate both null and undefined point intervals for a null query, and because
+        // a multikey index reuses the same entry for [] and undefined, we will not be able to cover
+        // the query.
+        if (!ime->hasEmptyArray()) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+void IndexBoundsBuilder::_mergeTightness(const BoundsTightness& tightness,
+                                         BoundsTightness& tightnessOut) {
+    // There is a special case where we may have a covered null query (EXACT_MAYBE_COVERED) and a
+    // regex with inexact bounds that doesn't need a FETCH (INEXACT_COVERED). In this case, we want
+    // to update the tightness to INEXACT_MAYBE_COVERED, to indicate that we need to check if the
+    // projection allows us to cover the query, but ensure that we will have a filter on the index
+    // if it turns out we can.
+    if (((tightness == BoundsTightness::EXACT_MAYBE_COVERED) &&
+         (tightnessOut == BoundsTightness::INEXACT_COVERED)) ||
+        ((tightness == BoundsTightness::INEXACT_COVERED) &&
+         (tightnessOut == BoundsTightness::EXACT_MAYBE_COVERED))) {
+        tightnessOut = BoundsTightness::INEXACT_MAYBE_COVERED;
+    } else if (tightness < tightnessOut) {
+        // Otherwise, fallback to picking the new tightness if it is looser than the old tightness.
+        tightnessOut = tightness;
+    }
+}
+
 void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr,
                                              const BSONElement& elt,
                                              const IndexEntry& index,
@@ -955,51 +1038,45 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr,
         });
 
         const InMatchExpression* ime = static_cast<const InMatchExpression*>(expr);
-
         *tightnessOut = IndexBoundsBuilder::EXACT;
 
         // Create our various intervals.
 
         IndexBoundsBuilder::BoundsTightness tightness;
-        bool arrayOrNullPresent = false;
+        // We check if the $in predicate satisfies conditions to be a covered null predicate on the
+        // basis of indexes, null intervals, and array intervals.
+        const bool entireNullIntervalMatchesPredicate =
+            detectIfEntireNullIntervalMatchesPredicate(ime, index);
         for (auto&& equality : ime->getEqualities()) {
-            translateEquality(equality, index, isHashed, oilOut, &tightness);
-            // The ordering invariant of oil has been violated by the call to translateEquality.
-            arrayOrNullPresent = arrayOrNullPresent || equality.type() == BSONType::jstNULL ||
-                equality.type() == BSONType::Array;
-            if (tightness != IndexBoundsBuilder::EXACT) {
-                *tightnessOut = tightness;
+            // First, we generate the bounds the same way that we would do for an individual
+            // equality. This will set tightness to the value it should be if this equality is being
+            // considered in isolation.
+            IndexBoundsBuilder::translateEquality(equality, index, isHashed, oilOut, &tightness);
+            if (entireNullIntervalMatchesPredicate &&
+                (BSONType::jstNULL == equality.type() ||
+                 (BSONType::Array == equality.type() && equality.Obj().isEmpty()))) {
+                // We may have a covered null query. In this case, we update both empty array and
+                // null interval tightness to EXACT_MAYBE_COVERED, as individually they would have a
+                // tightness of INEXACT_FETCH. However, we already know we will be able to cover
+                // these intervals together if we have appropriate projections. Note that any other
+                // intervals that cannot be covered may still require the query to use a FETCH.
+                tightness = IndexBoundsBuilder::EXACT_MAYBE_COVERED;
             }
+            IndexBoundsBuilder::_mergeTightness(tightness, *tightnessOut);
         }
 
         for (auto&& regex : ime->getRegexes()) {
             translateRegex(regex.get(), index, oilOut, &tightness);
-            if (tightness != IndexBoundsBuilder::EXACT) {
-                *tightnessOut = tightness;
-            }
-        }
-
-        if (ime->hasNull()) {
-            // A null index key does not always match a null query value so we must fetch the
-            // doc and run a full comparison.  See SERVER-4529.
-            // TODO: Do we already set the tightnessOut by calling translateEquality?
-            *tightnessOut = INEXACT_FETCH;
-        }
-
-        if (ime->hasEmptyArray()) {
-            // Empty arrays are indexed as undefined.
-            BSONObjBuilder undefinedBob;
-            undefinedBob.appendUndefined("");
-            oilOut->intervals.push_back(makePointInterval(undefinedBob.obj()));
-            *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+            IndexBoundsBuilder::_mergeTightness(tightness, *tightnessOut);
         }
 
         // Equalities are already sorted and deduped so unionize is unneccesary if no regexes
         // are present. Hashed indexes may also cause the bounds to be out-of-order.
-        // Arrays and nulls introduce multiple elements that neccesitate a sort and deduping.
-        if (!ime->getRegexes().empty() || index.type == IndexType::INDEX_HASHED ||
-            arrayOrNullPresent)
+        // Arrays and nulls introduce multiple elements that necessitate a sort and deduping.
+        if (ime->hasNonScalarOrNonEmptyValues() || index.type == IndexType::INDEX_HASHED) {
             unionize(oilOut);
+        }
+
     } else if (MatchExpression::GEO == expr->matchType()) {
         const GeoMatchExpression* gme = static_cast<const GeoMatchExpression*>(expr);
         if ("2dsphere" == elt.valueStringDataSafe()) {
@@ -1315,6 +1392,7 @@ void IndexBoundsBuilder::translateEquality(const BSONElement& data,
     }
 
     std::sort(oil->intervals.begin(), oil->intervals.end(), IntervalComparison);
+
     *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
 }
 
diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h
index b48228328f5..d1067caa561 100644
--- a/src/mongo/db/query/index_bounds_builder.h
+++ b/src/mongo/db/query/index_bounds_builder.h
@@ -67,16 +67,28 @@ public:
      * increasing tightness. These values are used when we need to do comparison between two
      * BoundsTightness values. Such comparisons can answer questions such as "Does predicate
      * X have tighter or looser bounds than predicate Y?".
+     *
+     * These enum values are ordered from loosest to tightest.
      */
     enum BoundsTightness {
         // Index bounds are inexact, and a fetch is required.
         INEXACT_FETCH = 0,
 
-        // Index bounds are inexact, but no fetch is required
-        INEXACT_COVERED = 1,
+        // Index bounds are inexact, and a fetch may be required depending on the projection.
+        // For example, a count $in query on null + a regex can be covered, but a find query with
+        // the same filter and no projection cannot.
+        INEXACT_MAYBE_COVERED = 1,
+
+        // Index bounds are exact, but a fetch may be required depending on the projection.
+        // For example, a find query on null may be covered, depending on which fields we project
+        // out.
+        EXACT_MAYBE_COVERED = 2,
+
+        // Index bounds are inexact, but no fetch is required.
+        INEXACT_COVERED = 3,
 
         // Index bounds are exact.
-        EXACT = 2
+        EXACT = 4
     };
 
     /**
@@ -301,6 +313,11 @@ private:
                                     OrderedIntervalList* oilOut,
                                     BoundsTightness* tightnessOut,
                                     interval_evaluation_tree::Builder* ietBuilder);
+
+    /**
+     * Helper method for merging interval tightness for $in expressions.
+     */
+    static void _mergeTightness(const BoundsTightness& tightness, BoundsTightness& tightnessOut);
 };
 
 }  // namespace mongo
diff --git a/src/mongo/db/query/index_bounds_builder_eq_null_test.cpp b/src/mongo/db/query/index_bounds_builder_eq_null_test.cpp
index af4cdf91303..31e17b6e2c8 100644
--- a/src/mongo/db/query/index_bounds_builder_eq_null_test.cpp
+++ b/src/mongo/db/query/index_bounds_builder_eq_null_test.cpp
@@ -48,7 +48,7 @@ void assertBoundsRepresentEqualsNull(const OrderedIntervalList& oil) {
                   oil.intervals[1].compare(Interval(fromjson("{'': null, '': null}"), true, true)));
 }
 
-TEST_F(IndexBoundsBuilderTest, TranslateExprEqualToNullIsInexactFetch) {
+TEST_F(IndexBoundsBuilderTest, TranslateExprEqualToNullIsExactMaybeCovered) {
     BSONObj keyPattern = BSON("a" << 1);
     BSONElement elt = keyPattern.firstElement();
     auto testIndex = buildSimpleIndexEntry(keyPattern);
@@ -65,11 +65,11 @@ TEST_F(IndexBoundsBuilderTest, TranslateExprEqualToNullIsInexactFetch) {
         oil.intervals[0].compare(Interval(fromjson("{'': undefined, '': undefined}"), true, true)));
     ASSERT_EQUALS(Interval::INTERVAL_EQUALS,
                   oil.intervals[1].compare(Interval(fromjson("{'': null, '': null}"), true, true)));
-    ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH);
+    ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT_MAYBE_COVERED);
     assertIET(inputParamIdMap, ietBuilder, elt, testIndex, oil);
 }
 
-TEST_F(IndexBoundsBuilderTest, TranslateEqualsToNullShouldBuildInexactBounds) {
+TEST_F(IndexBoundsBuilderTest, TranslateEqualsToNullShouldBuildExactMaybeCoveredBounds) {
     BSONObj indexPattern = BSON("a" << 1);
     auto testIndex = buildSimpleIndexEntry(indexPattern);
 
@@ -83,12 +83,12 @@ TEST_F(IndexBoundsBuilderTest, TranslateEqualsToNullShouldBuildInexactBounds) {
         expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness, &ietBuilder);
 
     ASSERT_EQUALS(oil.name, "a");
-    ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH);
+    ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT_MAYBE_COVERED);
     assertBoundsRepresentEqualsNull(oil);
     assertIET(inputParamIdMap, ietBuilder, indexPattern.firstElement(), testIndex, oil);
 }
 
-TEST_F(IndexBoundsBuilderTest, TranslateDottedEqualsToNullShouldBuildInexactBounds) {
+TEST_F(IndexBoundsBuilderTest, TranslateDottedEqualsToNullShouldBuildExactMaybeCoveredBounds) {
     BSONObj indexPattern = BSON("a.b" << 1);
     auto testIndex = buildSimpleIndexEntry(indexPattern);
 
@@ -102,7 +102,9 @@ TEST_F(IndexBoundsBuilderTest, TranslateDottedEqualsToNullShouldBuildInexactBoun
         expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness, &ietBuilder);
 
     ASSERT_EQUALS(oil.name, "a.b");
-    ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH);
+    // Depending on the query projection, this will either be converted to EXACT or to INEXACT_FETCH
+    // before we build an IXSCAN plan.
+    ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT_MAYBE_COVERED);
     assertBoundsRepresentEqualsNull(oil);
     assertIET(inputParamIdMap, ietBuilder, indexPattern.firstElement(), testIndex, oil);
 }
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index e05dfd8a5d1..dd75cbce48d 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -1114,47 +1114,13 @@ std::vector<std::unique_ptr<QuerySolutionNode>> QueryPlannerAccess::collapseEqui
 }
 
 /**
- * Returns true if this is a null query that can retrieve all the information it needs directly from
- * the index, and so does not need a FETCH stage on top of it. Returns false otherwise.
+ * This helper determines if a query can be covered depending on the query projection.
  */
-bool isCoveredNullQuery(const CanonicalQuery& query,
-                        MatchExpression* root,
-                        IndexTag* tag,
-                        const vector<IndexEntry>& indices,
-                        const QueryPlannerParams& params) {
-    // Sparse indexes and hashed indexes should not use this optimization as they will require a
-    // FETCH stage with a filter.
-    if (indices[tag->index].sparse || indices[tag->index].type == IndexType::INDEX_HASHED) {
-        return false;
-    }
-
-    // When the index is not multikey, we can support a query on an indexed field searching for null
-    // values. This optimization can only be done when the index is not multikey, otherwise empty
-    // arrays in the collection will be treated as null/undefined by the index. When the index is
-    // multikey, we can support a query searching for both null and empty array values.
-    const auto multikeyIndex = indices[tag->index].multikey;
-    if (root->matchType() == MatchExpression::MatchType::MATCH_IN) {
-        // Check that the query matches null values, if the index is not multikey, or null and empty
-        // array values, if the index is multikey. Note that the query may match values other than
-        // null (and empty array).
-        const auto node = static_cast<const InMatchExpression*>(root);
-        if (!node->hasNull() || (multikeyIndex && !node->hasEmptyArray())) {
-            return false;
-        }
-    } else if (ComparisonMatchExpressionBase::isEquality(root->matchType()) && !multikeyIndex) {
-        // Check that the query matches null values.
-        const auto node = static_cast<const ComparisonMatchExpressionBase*>(root);
-        if (node->getData().type() != BSONType::jstNULL) {
-            return false;
-        }
-    } else {
-        return false;
-    }
-
+bool projNeedsFetch(const CanonicalQuery& query, const QueryPlannerParams& params) {
     // If nothing is being projected, the query is fully covered without a fetch.
     // This is trivially true for a count query.
     if (params.options & QueryPlannerParams::Options::IS_COUNT) {
-        return true;
+        return false;
     }
 
     // This optimization can only be used for find when the index covers the projection completely.
@@ -1163,7 +1129,7 @@ bool isCoveredNullQuery(const CanonicalQuery& query,
     // in the multikey case). Hence, only find queries projecting _id are covered.
     auto proj = query.getProj();
     if (!proj) {
-        return false;
+        return true;
     }
 
     // We can cover projections on _id and generated fields and expressions depending only on _id.
@@ -1175,10 +1141,38 @@ bool isCoveredNullQuery(const CanonicalQuery& query,
         // Note that it is not possible to project onto dotted paths of _id here, since they may be
         // null or missing, and the index cannot differentiate between the two cases, so we would
         // still need a FETCH stage.
-        return projFields.size() == 1 && *projFields.begin() == "_id";
+        if (projFields.size() == 1 && *projFields.begin() == "_id") {
+            return false;
+        }
     }
 
-    return false;
+    return true;
+}
+
+/**
+ * This helper updates a MAYBE_COVERED query tightness to one of EXACT, INEXACT_COVERED, or
+ * INEXACT_FETCH, depending on whether we need a FETCH/filter to answer the query projection.
+ */
+void refineTightnessForMaybeCoveredQuery(const CanonicalQuery& query,
+                                         const QueryPlannerParams& params,
+                                         IndexBoundsBuilder::BoundsTightness& tightnessOut) {
+    // We need to refine the tightness in case we have a "MAYBE_COVERED" tightness bound which
+    // depends on the query's projection. We will not have information about the projection
+    // later on in order to make this determination, so we do it here.
+    const bool noFetchNeededForProj = !projNeedsFetch(query, params);
+    if (tightnessOut == IndexBoundsBuilder::EXACT_MAYBE_COVERED) {
+        if (noFetchNeededForProj) {
+            tightnessOut = IndexBoundsBuilder::EXACT;
+        } else {
+            tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+        }
+    } else if (tightnessOut == IndexBoundsBuilder::INEXACT_MAYBE_COVERED) {
+        if (noFetchNeededForProj) {
+            tightnessOut = IndexBoundsBuilder::INEXACT_COVERED;
+        } else {
+            tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+        }
+    }
 }
 
 bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
@@ -1222,11 +1216,6 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
         // If we're here, we now know that 'child' can use an index directly and the index is
         // over the child's field.
 
-        // We need to track if this is a covered null query so that we can have this information
-        // at hand when handling the filter on an indexed AND.
-        scanState.isCoveredNullQuery =
-            isCoveredNullQuery(query, child, scanState.ixtag, indices, params);
-
         // If 'child' is a NOT, then the tag we're interested in is on the NOT's
         // child node.
         if (MatchExpression::NOT == child->matchType()) {
@@ -1259,6 +1248,7 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
             verify(scanState.currentIndexNumber == scanState.ixtag->index);
             scanState.tightness = IndexBoundsBuilder::INEXACT_FETCH;
             mergeWithLeafNode(child, &scanState);
+            refineTightnessForMaybeCoveredQuery(query, params, scanState.tightness);
             handleFilter(&scanState);
         } else {
             if (nullptr != scanState.currentScan.get()) {
@@ -1278,6 +1268,7 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
                                                  &scanState.tightness,
                                                  scanState.getCurrentIETBuilder());
 
+            refineTightnessForMaybeCoveredQuery(query, params, scanState.tightness);
             handleFilter(&scanState);
         }
     }
@@ -1693,6 +1684,12 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::_buildIndexedDataAccess(
                 return soln;
             }
 
+            // We may be able to avoid adding an extra fetch stage even though the bounds are
+            // inexact, for instance if the query is counting null values on an indexed field
+            // without projecting that field. We therefore convert "MAYBE_COVERED" bounds into
+            // either EXACT or INEXACT, depending on the query projection.
+            refineTightnessForMaybeCoveredQuery(query, params, tightness);
+
             // If the bounds are exact, the set of documents that satisfy the predicate is
             // exactly equal to the set of documents that the scan provides.
             //
@@ -1700,11 +1697,7 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::_buildIndexedDataAccess(
             // superset of documents that satisfy the predicate, and we must check the
             // predicate.
 
-            // We may also be able to avoid adding an extra fetch stage even though the bounds are
-            // inexact because the query is counting null values on an indexed field without
-            // projecting that field.
-            if (tightness == IndexBoundsBuilder::EXACT ||
-                isCoveredNullQuery(query, root, tag, indices, params)) {
+            if (tightness == IndexBoundsBuilder::EXACT) {
                 return soln;
             } else if (tightness == IndexBoundsBuilder::INEXACT_COVERED &&
                        !indices[tag->index].multikey) {
@@ -1850,10 +1843,9 @@ void QueryPlannerAccess::handleFilterAnd(ScanBuildingState* scanState) {
         // should always be affixed as a filter. We keep 'curChild' in the $and
         // for affixing later.
         ++scanState->curChild;
-    } else if (scanState->tightness == IndexBoundsBuilder::EXACT || scanState->isCoveredNullQuery) {
-        // The tightness of the bounds is exact or we are dealing with a covered null query.
-        // Either way, we want to remove this child so that when control returns to handleIndexedAnd
-        // we know that we don't need it to create a FETCH stage.
+    } else if (scanState->tightness == IndexBoundsBuilder::EXACT) {
+        // The tightness of the bounds is exact. We want to remove this child so that when control
+        // returns to handleIndexedAnd we know that we don't need it to create a FETCH stage.
         root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild);
     } else if (scanState->tightness == IndexBoundsBuilder::INEXACT_COVERED &&
                (INDEX_TEXT == index.type || !index.multikey)) {
diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h
index 6ea44830415..bd9c856ffab 100644
--- a/src/mongo/db/query/planner_access.h
+++ b/src/mongo/db/query/planner_access.h
@@ -145,11 +145,9 @@ private:
     struct ScanBuildingState {
         ScanBuildingState(MatchExpression* theRoot,
                           const std::vector<IndexEntry>& indexList,
-                          bool inArrayOp,
-                          bool isCoveredNull = false)
+                          bool inArrayOp)
             : root(theRoot),
               inArrayOperator(inArrayOp),
-              isCoveredNullQuery(isCoveredNull),
               indices(indexList),
               currentScan(nullptr),
               curChild(0),
@@ -188,9 +186,6 @@ private:
         // Are we inside an array operator such as $elemMatch or $all?
         bool inArrayOperator;
 
-        // Is this a covered null query?
-        bool isCoveredNullQuery;
-
         // A list of relevant indices which 'root' may be tagged to use.
         const std::vector<IndexEntry>& indices;
author	Alya Berciu <alya.berciu@mongodb.com>	2022-11-08 16:37:07 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-11-08 17:13:58 +0000
commit	7e3381f8a26e6bb01d4715961f6babadff651de0 (patch)
tree	f19bceb6c5a3f711e349c7ab50ea4f5f4e5dc2f9
parent	d8329fbe00da54b5f8b0f60889364e7920968756 (diff)
download	mongo-7e3381f8a26e6bb01d4715961f6babadff651de0.tar.gz