summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlya Berciu <alya.berciu@mongodb.com>2022-11-08 16:37:07 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-11-08 17:13:58 +0000
commit7e3381f8a26e6bb01d4715961f6babadff651de0 (patch)
treef19bceb6c5a3f711e349c7ab50ea4f5f4e5dc2f9
parentd8329fbe00da54b5f8b0f60889364e7920968756 (diff)
downloadmongo-7e3381f8a26e6bb01d4715961f6babadff651de0.tar.gz
SERVER-70436 Handle covered $or null queries with regex
-rw-r--r--etc/backports_required_for_multiversion_tests.yml4
-rw-r--r--jstests/core/cover_null_queries.js322
-rw-r--r--jstests/core/null_query_semantics.js164
-rw-r--r--src/mongo/db/matcher/expression_leaf.cpp3
-rw-r--r--src/mongo/db/matcher/expression_leaf.h11
-rw-r--r--src/mongo/db/query/index_bounds_builder.cpp144
-rw-r--r--src/mongo/db/query/index_bounds_builder.h23
-rw-r--r--src/mongo/db/query/index_bounds_builder_eq_null_test.cpp14
-rw-r--r--src/mongo/db/query/planner_access.cpp100
-rw-r--r--src/mongo/db/query/planner_access.h7
10 files changed, 644 insertions, 148 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 2bcfc26a6e9..6b118574a7f 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -220,6 +220,8 @@ last-continuous:
ticket: SERVER-68728
- test_file: jstests/aggregation/expressions/switch_errors.js
ticket: SERVER-70190
+ - test_file: jstests/core/cover_null_queries.js
+ ticket: SERVER-70436
suites: null
last-lts:
all:
@@ -517,4 +519,6 @@ last-lts:
ticket: SERVER-68728
- test_file: jstests/aggregation/expressions/switch_errors.js
ticket: SERVER-70190
+ - test_file: jstests/core/cover_null_queries.js
+ ticket: SERVER-70436
suites: null
diff --git a/jstests/core/cover_null_queries.js b/jstests/core/cover_null_queries.js
index 50bf0b58b07..a05cb8ec3f8 100644
--- a/jstests/core/cover_null_queries.js
+++ b/jstests/core/cover_null_queries.js
@@ -3,7 +3,9 @@
* @tags: [
* assumes_unsharded_collection,
* requires_non_retryable_writes,
- * requires_fcv_52
+ * requires_fcv_60,
+ * # This test could produce unexpected explain output if additional indexes are created.
+ * assumes_no_implicit_index_creation,
* ]
*/
(function() {
@@ -57,12 +59,22 @@ function validateStages({cmdObj, expectedStages, isAgg}) {
*/
function validateFindCmdOutputAndPlan({filter, projection, expectedStages, expectedOutput}) {
const cmdObj = {find: coll.getName(), filter: filter, projection: projection};
+
+ // Compare index output with expected output.
if (expectedOutput) {
const res = assert.commandWorked(coll.runCommand(cmdObj));
const ouputArray = new DBCommandCursor(coll.getDB(), res).toArray();
assert(arrayEq(expectedOutput, ouputArray), ouputArray);
}
+
+ // Validate explain.
validateStages({cmdObj, expectedStages});
+
+ // Verify that we get the same output as we expect without an index.
+ const noIndexCmdObj = Object.assign(cmdObj, {hint: {$natural: 1}});
+ const resNoIndex = assert.commandWorked(coll.runCommand(noIndexCmdObj));
+ const noIndexOutArr = new DBCommandCursor(coll.getDB(), resNoIndex).toArray();
+ assert(arrayEq(expectedOutput, noIndexOutArr), noIndexOutArr);
}
/**
@@ -71,10 +83,18 @@ function validateFindCmdOutputAndPlan({filter, projection, expectedStages, expec
* are present in the plan returned.
*/
function validateSimpleCountCmdOutputAndPlan({filter, expectedStages, expectedCount}) {
+ // Compare index output with expected output.
const cmdObj = {count: coll.getName(), query: filter};
const res = assert.commandWorked(coll.runCommand(cmdObj));
assert.eq(res.n, expectedCount);
+
+ // Validate explain.
validateStages({cmdObj, expectedStages});
+
+ // Verify that we get the same output with and without an index.
+ const noIndexCmdObj = Object.assign(cmdObj, {hint: {$natural: 1}});
+ const resNoIndex = assert.commandWorked(coll.runCommand(noIndexCmdObj));
+ assert.eq(resNoIndex.n, expectedCount);
}
/**
@@ -88,11 +108,33 @@ function validateCountAggCmdOutputAndPlan({filter, expectedStages, expectedCount
pipeline: pipeline || [{$match: filter}, {$count: "count"}],
cursor: {},
};
+
+ // Compare index output with expected output.
const cmdRes = assert.commandWorked(coll.runCommand(cmdObj));
const countRes = cmdRes.cursor.firstBatch;
assert.eq(countRes.length, 1, cmdRes);
assert.eq(countRes[0].count, expectedCount, countRes);
+
+ // Validate explain.
validateStages({cmdObj, expectedStages, isAgg: true});
+
+ // Verify that we get the same output as we expect without an index.
+ const noIndexCmdObj = Object.assign(cmdObj, {hint: {$natural: 1}});
+ const resNoIndex = assert.commandWorked(coll.runCommand(noIndexCmdObj));
+ const countResNoIndex = resNoIndex.cursor.firstBatch;
+ assert.eq(countResNoIndex.length, 1, cmdRes);
+ assert.eq(countResNoIndex[0].count, expectedCount, countRes);
+}
+
+/**
+ * Same as above, but uses a $group count.
+ */
+function validateGroupCountAggCmdOutputAndPlan({filter, expectedStages, expectedCount}) {
+ validateCountAggCmdOutputAndPlan({
+ expectedStages,
+ expectedCount,
+ pipeline: [{$match: filter}, {$group: {_id: 0, count: {$count: {}}}}]
+ });
}
assert.commandWorked(coll.createIndex({a: 1, _id: 1}));
@@ -152,7 +194,7 @@ validateFindCmdOutputAndPlan({
expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1},
});
-// Same as above, but special case for null and empty array predicate.
+// We can cover a $in with null and an empty array predicate.
validateFindCmdOutputAndPlan({
filter: {a: {$in: [null, []]}},
projection: {_id: 1},
@@ -165,6 +207,21 @@ validateSimpleCountCmdOutputAndPlan({
expectedStages: {"IXSCAN": 1, "FETCH": 0},
});
+// We cannot cover a $in with null and an array predicate.
+// TODO SERVER-71058: It should be possible to cover this case and the more general case of matching
+// an array on a non-multikey index.
+validateFindCmdOutputAndPlan({
+ filter: {a: {$in: [null, ["a"]]}},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+ filter: {a: {$in: [null, ["a"]]}},
+ expectedCount: 4,
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "COUNT": 1},
+});
+
// Verify that a more complex projection that only relies on the _id field does not need a FETCH.
validateFindCmdOutputAndPlan({
filter: {a: null},
@@ -691,4 +748,265 @@ validateCountAggCmdOutputAndPlan({
expectedCount: 4,
expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
});
+
+// Validate that we can use the optimization when we have regex without array elements in a $in or
+// $or. See SERVER-70436 for more details.
+coll.drop();
+
+assert.commandWorked(coll.insertMany([
+ {_id: 1, a: '123456'},
+ {_id: 2, a: '1234567'},
+ {_id: 3, a: ' 12345678'},
+ {_id: 4, a: '444456'},
+ {_id: 5, a: ''},
+ {_id: 6, a: null},
+ {_id: 7},
+]));
+
+assert.commandWorked(coll.createIndex({a: 1, _id: 1}));
+
+// TODO SERVER-70998: Can apply optimization in case without regex; however, we still can't use a
+// COUNT_SCAN in this case.
+validateFindCmdOutputAndPlan({
+ filter: {$or: [{a: null}, {a: ""}]},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 5}, {_id: 6}, {_id: 7}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+ filter: {$or: [{a: null}, {a: ""}]},
+ expectedCount: 3,
+ expectedStages: {"COUNT": 1, "IXSCAN": 1, "FETCH": 0},
+});
+validateGroupCountAggCmdOutputAndPlan({
+ filter: {$or: [{a: null}, {a: ""}]},
+ expectedCount: 3,
+ expectedStages: {"IXSCAN": 1, "FETCH": 0},
+});
+
+// Can still apply optimization when we have regex.
+validateFindCmdOutputAndPlan({
+ filter: {$or: [{a: null}, {a: {$regex: "^$"}}]},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 5}, {_id: 6}, {_id: 7}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+ filter: {$or: [{a: null}, {a: {$regex: "^$"}}]},
+ expectedCount: 3,
+ expectedStages: {"IXSCAN": 1, "FETCH": 0, "COUNT": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+ filter: {$or: [{a: null}, {a: {$regex: "^$"}}]},
+ expectedCount: 3,
+ expectedStages: {"IXSCAN": 1, "FETCH": 0},
+});
+
+// Now test case with a multikey index. We can't leverage the optimization here.
+assert.commandWorked(coll.insert({_id: 8, a: [1, 2, 3]}));
+assert.commandWorked(coll.insert({_id: 9, a: []}));
+
+validateFindCmdOutputAndPlan({
+ filter: {$or: [{a: null}, {a: []}, {a: {$regex: "^$"}}]},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 5}, {_id: 6}, {_id: 7}, {_id: 9}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+ filter: {$or: [{a: null}, {a: []}, {a: {$regex: "^$"}}]},
+ expectedCount: 4,
+ expectedStages: {"COUNT": 1, "IXSCAN": 1, "FETCH": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+ filter: {$or: [{a: null}, {a: []}, {a: {$regex: "^$"}}]},
+ expectedCount: 4,
+ expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+
+// We also shouldn't cover queries on multikey indexes where $in includes an array, as we will still
+// need a filter after the IXSCAN to correctly return
+validateFindCmdOutputAndPlan({
+ filter: {$or: [{a: null}, {a: []}, {a: [2]}]},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 6}, {_id: 7}, {_id: 9}],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+ filter: {$or: [{a: null}, {a: []}, {a: [2]}]},
+ expectedCount: 3,
+ expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+ filter: {$or: [{a: null}, {a: []}, {a: [2]}]},
+ expectedCount: 3,
+ expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+
+// Validate that when we have a dotted path, we return the correct results for null queries.
+coll.drop();
+assert.commandWorked(coll.insertMany([
+ {_id: 1, a: 1},
+ {_id: 2, a: null},
+ {_id: 3},
+ {_id: 4, a: {b: 1}},
+ {_id: 5, a: {b: null}},
+ {_id: 6, a: {c: 1}},
+]));
+assert.commandWorked(coll.createIndex({"a.b": 1, _id: 1}));
+
+validateFindCmdOutputAndPlan({
+ filter: {"a.b": null},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 1}, {_id: 2}, {_id: 3}, {_id: 5}, {_id: 6}],
+ expectedStages: {"IXSCAN": 1, "PROJECTION_COVERED": 1, "FETCH": 0},
+});
+validateSimpleCountCmdOutputAndPlan({
+ filter: {"a.b": null},
+ expectedCount: 5,
+ expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
+});
+validateGroupCountAggCmdOutputAndPlan({
+ filter: {"a.b": null},
+ expectedCount: 5,
+ expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0},
+});
+
+validateFindCmdOutputAndPlan({
+ filter: {a: {b: null}},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 5}],
+ expectedStages: {"COLLSCAN": 1, "PROJECTION_SIMPLE": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+ filter: {a: {b: null}},
+ expectedCount: 1,
+ expectedStages: {"COLLSCAN": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+ filter: {a: {b: null}},
+ expectedCount: 1,
+ expectedStages: {"COLLSCAN": 1},
+});
+
+// Still need fetch if we don't have a sufficiently restrictive projection.
+validateFindCmdOutputAndPlan({
+ filter: {"a.b": null},
+ projection: {_id: 1, a: 1},
+ expectedOutput: [
+ {_id: 1, a: 1},
+ {_id: 2, a: null},
+ {_id: 3},
+ {_id: 5, a: {b: null}},
+ {_id: 6, a: {c: 1}},
+ ],
+ expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+
+// Make index multikey, and test case where field b is nested in an array.
+assert.commandWorked(coll.insertMany([
+ {_id: 7, a: [{b: null}]},
+ {_id: 8, a: [{b: []}]},
+ {_id: 9, a: [{b: [1, 2, 3]}]},
+ {_id: 10, a: [{b: 123}]},
+ {_id: 11, a: [{c: 123}]},
+ {_id: 12, a: []},
+ {_id: 13, a: [{}]},
+ {_id: 14, a: [1, 2, 3]},
+ {_id: 15, a: [{b: 1}, {c: 2}, {b: 3}]},
+ {_id: 16, a: [null]},
+]));
+
+validateFindCmdOutputAndPlan({
+ filter: {"a.b": null},
+ projection: {_id: 1},
+ expectedOutput: [
+ {_id: 1},
+ {_id: 2},
+ {_id: 3},
+ {_id: 5},
+ {_id: 6},
+ {_id: 7},
+ {_id: 11},
+ {_id: 13},
+ {_id: 15}
+ ],
+ expectedStages: {"IXSCAN": 1, "PROJECTION_SIMPLE": 1, "FETCH": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+ filter: {"a.b": null},
+ expectedCount: 9,
+ expectedStages: {"COUNT": 1, "IXSCAN": 1, "FETCH": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+ filter: {"a.b": null},
+ expectedCount: 9,
+ expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+
+validateFindCmdOutputAndPlan({
+ filter: {a: {b: null}},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 5}, {_id: 7}],
+ expectedStages: {
+ "COLLSCAN": 1,
+ "PROJECTION_SIMPLE": 1,
+ },
+});
+validateSimpleCountCmdOutputAndPlan({
+ filter: {a: {b: null}},
+ expectedCount: 2,
+ expectedStages: {"COLLSCAN": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+ filter: {a: {b: null}},
+ expectedCount: 2,
+ expectedStages: {"COLLSCAN": 1},
+});
+
+validateFindCmdOutputAndPlan({
+ filter: {a: [{b: null}]},
+ projection: {_id: 1},
+ expectedOutput: [{_id: 7}],
+ expectedStages: {"COLLSCAN": 1, "PROJECTION_SIMPLE": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+ filter: {a: [{b: null}]},
+ expectedCount: 1,
+ expectedStages: {"COLLSCAN": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+ filter: {a: [{b: null}]},
+ expectedCount: 1,
+ expectedStages: {"COLLSCAN": 1},
+});
+
+// We still need a FETCH for composite paths, because both {a: [1,2,3]} and {"a.b": null} generate
+// null index keys, but the former should not match the predicate below.
+validateFindCmdOutputAndPlan({
+ filter: {"a.b": {$in: [null, []]}},
+ projection: {_id: 1},
+ expectedOutput: [
+ {_id: 1},
+ {_id: 2},
+ {_id: 3},
+ {_id: 5},
+ {_id: 6},
+ {_id: 7},
+ {_id: 8},
+ {_id: 11},
+ {_id: 13},
+ {_id: 15}
+ ],
+ expectedStages: {"IXSCAN": 1, "PROJECTION_SIMPLE": 1, "FETCH": 1},
+});
+validateSimpleCountCmdOutputAndPlan({
+ filter: {"a.b": {$in: [null, []]}},
+ expectedCount: 10,
+ expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
+validateGroupCountAggCmdOutputAndPlan({
+ filter: {"a.b": {$in: [null, []]}},
+ expectedCount: 10,
+ expectedStages: {"IXSCAN": 1, "FETCH": 1},
+});
})();
diff --git a/jstests/core/null_query_semantics.js b/jstests/core/null_query_semantics.js
index 1ff14cc710c..aa9042c5ac5 100644
--- a/jstests/core/null_query_semantics.js
+++ b/jstests/core/null_query_semantics.js
@@ -41,6 +41,9 @@ function testNullSemantics(coll) {
[{_id: "a_null", a: null}, {_id: "a_undefined", a: undefined}, {_id: "no_a"}];
assert(resultsEq(expected, noProjectResults), tojson(noProjectResults));
+ const count = coll.count({a: {$eq: null}});
+ assert.eq(count, expected.length);
+
const projectResults = coll.find({a: {$eq: null}}, projectToOnlyA).toArray();
assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults));
}());
@@ -57,6 +60,9 @@ function testNullSemantics(coll) {
];
assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+ const count = coll.count({a: {$ne: null}});
+ assert.eq(count, expected.length);
+
const projectResults = coll.find({a: {$ne: null}}, projectToOnlyA).toArray();
assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults));
}());
@@ -72,6 +78,9 @@ function testNullSemantics(coll) {
{_id: "no_a"},
];
+ const count = coll.count(query);
+ assert.eq(count, expected.length);
+
assert(resultsEq(noProjectResults, expected), noProjectResults);
const projectResults = coll.find(query, projectToOnlyA).toArray();
@@ -89,6 +98,9 @@ function testNullSemantics(coll) {
{_id: "no_a"},
];
+ const count = coll.count(query);
+ assert.eq(count, expected.length);
+
assert(resultsEq(noProjectResults, expected), noProjectResults);
const projectResults = coll.find(query, projectToOnlyA).toArray();
@@ -106,6 +118,9 @@ function testNullSemantics(coll) {
{_id: "a_subobject_b_undefined", a: {b: undefined}},
];
+ const count = coll.count(query);
+ assert.eq(count, expected.length);
+
assert(resultsEq(noProjectResults, expected), noProjectResults);
const projectResults = coll.find(query, projectToOnlyA).toArray();
@@ -126,6 +141,9 @@ function testNullSemantics(coll) {
];
assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+ const count = coll.count(query);
+ assert.eq(count, expected.length);
+
const projectResults = coll.find(query, projectToOnlyA).toArray();
assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
}());
@@ -137,6 +155,9 @@ function testNullSemantics(coll) {
];
assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+ const count = coll.count({a: {$exists: false}});
+ assert.eq(count, expected.length);
+
const projectResults = coll.find({a: {$exists: false}}, projectToOnlyA).toArray();
assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults));
}());
@@ -144,17 +165,19 @@ function testNullSemantics(coll) {
// Test the semantics of the query {"a.b": {$eq: null}}.
(function testDottedEqualsNull() {
const noProjectResults = coll.find({"a.b": {$eq: null}}).toArray();
- assert(resultsEq(noProjectResults,
- [
- {_id: "a_empty_subobject", a: {}},
- {_id: "a_null", a: null},
- {_id: "a_number", a: 4},
- {_id: "a_subobject_b_null", a: {b: null}},
- {_id: "a_subobject_b_undefined", a: {b: undefined}},
- {_id: "a_undefined", a: undefined},
- {_id: "no_a"}
- ]),
- tojson(noProjectResults));
+ const expected = [
+ {_id: "a_empty_subobject", a: {}},
+ {_id: "a_null", a: null},
+ {_id: "a_number", a: 4},
+ {_id: "a_subobject_b_null", a: {b: null}},
+ {_id: "a_subobject_b_undefined", a: {b: undefined}},
+ {_id: "a_undefined", a: undefined},
+ {_id: "no_a"}
+ ];
+ assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+
+ const count = coll.count({"a.b": {$eq: null}});
+ assert.eq(count, expected.length);
const projectResults = coll.find({"a.b": {$eq: null}}, projectToOnlyADotB).toArray();
assert(resultsEq(projectResults,
@@ -165,8 +188,11 @@ function testNullSemantics(coll) {
// Test the semantics of the query {"a.b": {$ne: null}}.
(function testDottedNotEqualsNull() {
const noProjectResults = coll.find({"a.b": {$ne: null}}).toArray();
- assert(resultsEq(noProjectResults, [{_id: "a_subobject_b_not_null", a: {b: "hi"}}]),
- tojson(noProjectResults));
+ const expected = [{_id: "a_subobject_b_not_null", a: {b: "hi"}}];
+ assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+
+ const count = coll.count({"a.b": {$ne: null}});
+ assert.eq(count, expected.length);
const projectResults = coll.find({"a.b": {$ne: null}}, projectToOnlyADotB).toArray();
assert(resultsEq(projectResults, [{a: {b: "hi"}}]), tojson(projectResults));
@@ -183,6 +209,9 @@ function testNullSemantics(coll) {
];
assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+ const count = coll.count({"a.b": {$exists: false}});
+ assert.eq(count, expected.length);
+
const projectResults = coll.find({"a.b": {$exists: false}}, projectToOnlyADotB).toArray();
assert(resultsEq(projectResults, [{}, {a: {}}, {}, {}, {}]), tojson(projectResults));
}());
@@ -202,6 +231,9 @@ function testNullSemantics(coll) {
`Expected no results for query ${tojson(elemMatchQuery)}, got ` +
tojson(noProjectResults));
+ const count = coll.count(elemMatchQuery);
+ assert.eq(count, 0);
+
let projectResults = coll.find(elemMatchQuery, projectToOnlyA).toArray();
assert(resultsEq(projectResults, []),
`Expected no results for query ${tojson(elemMatchQuery)}, got ` +
@@ -250,6 +282,9 @@ function testNullSemantics(coll) {
];
assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+ const count = coll.count({a: {$eq: null}});
+ assert.eq(count, expected.length);
+
const projectResults = coll.find({a: {$eq: null}}, projectToOnlyA).toArray();
assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults));
}());
@@ -274,6 +309,9 @@ function testNullSemantics(coll) {
];
assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+ const count = coll.count({a: {$ne: null}});
+ assert.eq(count, expected.length);
+
const projectResults = coll.find({a: {$ne: null}}, projectToOnlyA).toArray();
assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults));
}());
@@ -297,6 +335,8 @@ function testNullSemantics(coll) {
{_id: "a_value_array_no_nulls", a: [1, "string", 4]},
];
assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+ const count = coll.count({a: {$not: {$gte: null}}});
+ assert.eq(count, expected.length);
}());
// Test the semantics of the query {a: {$in: [null, <number>]}}.
@@ -314,6 +354,9 @@ function testNullSemantics(coll) {
assert(resultsEq(noProjectResults, expected), noProjectResults);
+ const count = coll.count({a: {$in: [null, 75]}});
+ assert.eq(count, expected.length);
+
const projectResults = coll.find(query, projectToOnlyA).toArray();
assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
}());
@@ -333,6 +376,9 @@ function testNullSemantics(coll) {
assert(resultsEq(noProjectResults, expected), noProjectResults);
+ const count = coll.count({$or: [{a: null}, {a: 75}]});
+ assert.eq(count, expected.length);
+
const projectResults = coll.find(query, projectToOnlyA).toArray();
assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
}());
@@ -360,6 +406,9 @@ function testNullSemantics(coll) {
assert(resultsEq(noProjectResults, expected), noProjectResults);
+ const count = coll.count({a: {$nin: [null, 75]}});
+ assert.eq(count, expected.length);
+
const projectResults = coll.find(query, projectToOnlyA).toArray();
assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
}());
@@ -395,6 +444,9 @@ function testNullSemantics(coll) {
assert(resultsEq(noProjectResults, expected), noProjectResults);
+ const count = coll.count({a: {$nin: [null, []]}});
+ assert.eq(count, expected.length);
+
const projectResults = coll.find(query, projectToOnlyA).toArray();
assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
}());
@@ -420,6 +472,9 @@ function testNullSemantics(coll) {
assert(resultsEq(noProjectResults, expected), noProjectResults);
+ const count = coll.count(query);
+ assert.eq(count, expected.length);
+
const projectResults = coll.find(query, projectToOnlyA).toArray();
assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
}());
@@ -435,6 +490,9 @@ function testNullSemantics(coll) {
];
assert(resultsEq(noProjectResults, expectedEqualToNull), tojson(noProjectResults));
+ const count = coll.count({a: {$elemMatch: {$eq: null}}});
+ assert.eq(count, expectedEqualToNull.length);
+
let projectResults = coll.find({a: {$elemMatch: {$eq: null}}}, projectToOnlyA).toArray();
assert(resultsEq(projectResults, extractAValues(expectedEqualToNull)),
tojson(projectResults));
@@ -467,25 +525,23 @@ function testNullSemantics(coll) {
// value for "b".
(function testDottedEqualsNull() {
const noProjectResults = coll.find({"a.b": {$eq: null}}).toArray();
- assert(
- resultsEq(noProjectResults,
- [
- {_id: "a_empty_subobject", a: {}},
- {_id: "a_null", a: null},
- {_id: "a_number", a: 4},
- {_id: "a_subobject_b_null", a: {b: null}},
- {_id: "a_subobject_b_undefined", a: {b: undefined}},
- {_id: "a_undefined", a: undefined},
- {_id: "no_a"},
- {
- _id: "a_object_array_all_b_nulls",
- a: [{b: null}, {b: undefined}, {b: null}, {}]
- },
- {_id: "a_object_array_some_b_nulls", a: [{b: null}, {b: 3}, {b: null}]},
- {_id: "a_object_array_some_b_undefined", a: [{b: undefined}, {b: 3}]},
- {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]},
- ]),
- tojson(noProjectResults));
+ const expected = [
+ {_id: "a_empty_subobject", a: {}},
+ {_id: "a_null", a: null},
+ {_id: "a_number", a: 4},
+ {_id: "a_subobject_b_null", a: {b: null}},
+ {_id: "a_subobject_b_undefined", a: {b: undefined}},
+ {_id: "a_undefined", a: undefined},
+ {_id: "no_a"},
+ {_id: "a_object_array_all_b_nulls", a: [{b: null}, {b: undefined}, {b: null}, {}]},
+ {_id: "a_object_array_some_b_nulls", a: [{b: null}, {b: 3}, {b: null}]},
+ {_id: "a_object_array_some_b_undefined", a: [{b: undefined}, {b: 3}]},
+ {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]},
+ ];
+ assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+
+ const count = coll.count({"a.b": {$eq: null}});
+ assert.eq(count, expected.length);
const projectResults = coll.find({"a.b": {$eq: null}}, projectToOnlyADotB).toArray();
assert(resultsEq(projectResults,
@@ -508,18 +564,20 @@ function testNullSemantics(coll) {
// Test the semantics of the query {"a.b": {$ne: null}}.
(function testDottedNotEqualsNull() {
const noProjectResults = coll.find({"a.b": {$ne: null}}).toArray();
- assert(resultsEq(noProjectResults,
- [
- {_id: "a_subobject_b_not_null", a: {b: "hi"}},
- {_id: "a_double_array", a: [[]]},
- {_id: "a_empty_array", a: []},
- {_id: "a_object_array_no_b_nulls", a: [{b: 1}, {b: 3}, {b: "string"}]},
- {_id: "a_value_array_all_nulls", a: [null, null]},
- {_id: "a_value_array_no_nulls", a: [1, "string", 4]},
- {_id: "a_value_array_with_null", a: [1, "string", null, 4]},
- {_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]}
- ]),
- tojson(noProjectResults));
+ const expected = [
+ {_id: "a_subobject_b_not_null", a: {b: "hi"}},
+ {_id: "a_double_array", a: [[]]},
+ {_id: "a_empty_array", a: []},
+ {_id: "a_object_array_no_b_nulls", a: [{b: 1}, {b: 3}, {b: "string"}]},
+ {_id: "a_value_array_all_nulls", a: [null, null]},
+ {_id: "a_value_array_no_nulls", a: [1, "string", 4]},
+ {_id: "a_value_array_with_null", a: [1, "string", null, 4]},
+ {_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]}
+ ];
+ assert(resultsEq(noProjectResults, expected), tojson(noProjectResults));
+
+ const count = coll.count({"a.b": {$ne: null}});
+ assert.eq(count, expected.length);
const projectResults = coll.find({"a.b": {$ne: null}}, projectToOnlyADotB).toArray();
assert(resultsEq(projectResults,
@@ -554,6 +612,9 @@ function testNullSemantics(coll) {
{_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]},
];
+ const count = coll.count({"a.b": {$in: [null, 75]}});
+ assert.eq(count, expected.length);
+
assert(resultsEq(noProjectResults, expected), noProjectResults);
}());
@@ -575,6 +636,9 @@ function testNullSemantics(coll) {
{_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]},
];
+ const count = coll.count({$or: [{"a.b": null}, {"a.b": 75}]});
+ assert.eq(count, expected.length);
+
assert(resultsEq(noProjectResults, expected), noProjectResults);
}());
@@ -595,6 +659,9 @@ function testNullSemantics(coll) {
assert(resultsEq(noProjectResults, expected), noProjectResults);
+ const count = coll.count({"a.b": {$nin: [null, 75]}});
+ assert.eq(count, expected.length);
+
const projectResults = coll.find(query, projectToOnlyA).toArray();
assert(resultsEq(projectResults, extractAValues(expected)), projectResults);
}());
@@ -613,6 +680,9 @@ function testNullSemantics(coll) {
{_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]},
];
+ const count = coll.count({"a.b": {$nin: [null, /^str.*/]}});
+ assert.eq(count, expected.length);
+
assert(resultsEq(noProjectResults, expected), noProjectResults);
const projectResults = coll.find(query, projectToOnlyA).toArray();
@@ -625,6 +695,9 @@ function testNullSemantics(coll) {
let results = coll.find({"a.b": {$elemMatch: {$eq: null}}}).toArray();
assert(resultsEq(results, []), tojson(results));
+ const count = coll.count({"a.b": {$elemMatch: {$eq: null}}});
+ assert.eq(count, 0);
+
results = coll.find({"a.b": {$elemMatch: {$ne: null}}}).toArray();
assert(resultsEq(results, []), tojson(results));
}());
@@ -642,6 +715,9 @@ function testNullSemantics(coll) {
];
assert(resultsEq(noProjectResults, expectedEqualToNull), tojson(noProjectResults));
+ const count = coll.count({a: {$elemMatch: {b: {$eq: null}}}});
+ assert.eq(count, expectedEqualToNull.length);
+
let projectResults =
coll.find({a: {$elemMatch: {b: {$eq: null}}}}, projectToOnlyADotB).toArray();
assert(resultsEq(projectResults,
diff --git a/src/mongo/db/matcher/expression_leaf.cpp b/src/mongo/db/matcher/expression_leaf.cpp
index 31157666e92..990f91c1796 100644
--- a/src/mongo/db/matcher/expression_leaf.cpp
+++ b/src/mongo/db/matcher/expression_leaf.cpp
@@ -434,6 +434,7 @@ std::unique_ptr<MatchExpression> InMatchExpression::shallowClone() const {
}
next->_hasNull = _hasNull;
next->_hasEmptyArray = _hasEmptyArray;
+ next->_hasNonEmptyArrayOrObject = _hasNonEmptyArrayOrObject;
next->_equalitySet = _equalitySet;
next->_originalEqualityVector = _originalEqualityVector;
next->_equalityStorage = _equalityStorage;
@@ -577,6 +578,8 @@ Status InMatchExpression::setEqualities(std::vector<BSONElement> equalities) {
_hasNull = true;
} else if (equality.type() == BSONType::Array && equality.Obj().isEmpty()) {
_hasEmptyArray = true;
+ } else if (equality.type() == BSONType::Array || equality.type() == BSONType::Object) {
+ _hasNonEmptyArrayOrObject = true;
}
}
diff --git a/src/mongo/db/matcher/expression_leaf.h b/src/mongo/db/matcher/expression_leaf.h
index 46a80aa5e91..a00ed9e31f1 100644
--- a/src/mongo/db/matcher/expression_leaf.h
+++ b/src/mongo/db/matcher/expression_leaf.h
@@ -723,6 +723,14 @@ public:
return _hasEmptyArray;
}
+ bool hasNonEmptyArrayOrObject() const {
+ return _hasNonEmptyArrayOrObject;
+ }
+
+ bool hasNonScalarOrNonEmptyValues() const {
+ return hasNonEmptyArrayOrObject() || hasNull() || hasRegex();
+ }
+
void acceptVisitor(MatchExpressionMutableVisitor* visitor) final {
visitor->visit(this);
}
@@ -748,6 +756,9 @@ private:
// Whether or not '_equalities' has an empty array element in it.
bool _hasEmptyArray = false;
+ // Whether or not '_equalities' has a non-empty array or object element in it.
+ bool _hasNonEmptyArrayOrObject = false;
+
// Collator used to construct '_eltCmp';
const CollatorInterface* _collator = nullptr;
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index 01e13b5f058..3c027bde140 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -108,13 +108,38 @@ Interval makeNullPointInterval(bool isHashed) {
return isHashed ? kHashedNullInterval : IndexBoundsBuilder::kNullPointInterval;
}
+/**
+ * This helper updates the query bounds tightness for the limited set of conditions where we see a
+ * null query that can be covered.
+ */
+void updateTightnessForNullQuery(const IndexEntry& index,
+ IndexBoundsBuilder::BoundsTightness* tightnessOut) {
+ if (index.sparse || index.type == IndexType::INDEX_HASHED) {
+ // Sparse indexes and hashed indexes require a FETCH stage with a filter for null queries.
+ *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+ return;
+ }
+
+ if (index.multikey) {
+ // If we have a simple equality null query and our index is multikey, we cannot cover the
+ // query. This is because null intervals are translated into the null and undefined point
+ // intervals, and the undefined point interval includes entries for []. In the case of a
+ // single null interval, [] should not match.
+ *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+ return;
+ }
+
+ // The query may be fully covered by the index if the projection allows it, since the case above
+ // about the empty array can only become an issue if there is an empty array present, which
+ // would mark the index as multikey.
+ *tightnessOut = IndexBoundsBuilder::EXACT_MAYBE_COVERED;
+}
+
void makeNullEqualityBounds(const IndexEntry& index,
bool isHashed,
OrderedIntervalList* oil,
IndexBoundsBuilder::BoundsTightness* tightnessOut) {
- // An equality to null predicate cannot be covered because the index does not distinguish
- // between the lack of a value and the literal value null.
- *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+ updateTightnessForNullQuery(index, tightnessOut);
// There are two values that could possibly be equal to null in an index: undefined and null.
oil->intervals.push_back(makeUndefinedPointInterval(isHashed));
@@ -254,7 +279,10 @@ bool IndexBoundsBuilder::canUseCoveredMatching(const MatchExpression* expr,
IndexBoundsBuilder::BoundsTightness tightness;
OrderedIntervalList oil;
translate(expr, BSONElement{}, index, &oil, &tightness, /* iet::Builder */ nullptr);
- return tightness >= IndexBoundsBuilder::INEXACT_COVERED;
+ // We have additional tightness values (MAYBE_COVERED), but we cannot generally cover those
+ // cases unless we have an appropriate projection.
+ return tightness == IndexBoundsBuilder::INEXACT_COVERED ||
+ tightness == IndexBoundsBuilder::EXACT;
}
// static
@@ -404,6 +432,61 @@ const Interval IndexBoundsBuilder::kNullPointInterval =
const Interval IndexBoundsBuilder::kEmptyArrayPointInterval =
IndexBoundsBuilder::makePointInterval(kEmptyArrayElementObj);
+bool detectIfEntireNullIntervalMatchesPredicate(const InMatchExpression* ime,
+ const IndexEntry& index) {
+ if (!ime->hasNull()) {
+ // This isn't a null query.
+ return false;
+ }
+
+ if (index.sparse || (IndexType::INDEX_HASHED == index.type)) {
+ // Sparse indexes and hashed indexes still require a FETCH stage with a filter for null
+ // queries.
+ return false;
+ }
+
+ // Given the context of having a null $in query with eligible indexes, we may be able to cover
+ // some combinations of intervals that we could not cover individually.
+ if (index.multikey) {
+ // If the path has multiple components and we have a multikey index, we still need a FETCH
+ // in order to defend against cases where we have a multikey index on "a". These documents
+ // will generate null index keys: {"a.b": null} and {a: [1,2,3]}. However, a query like
+ // {"a.b": {$in: [null, []]}} should not match {a: [1, 2, 3]}.
+ // TODO SERVER-71021: it may be possible to cover more cases here.
+ if (ime->fieldRef()->numParts() > 1) {
+ return false;
+ }
+
+ // We must have an equality to an empty array for this null query to be covered, otherwise,
+ // because we generate both null and undefined point intervals for a null query, and because
+ // a multikey index reuses the same entry for [] and undefined, we will not be able to cover
+ // the query.
+ if (!ime->hasEmptyArray()) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void IndexBoundsBuilder::_mergeTightness(const BoundsTightness& tightness,
+ BoundsTightness& tightnessOut) {
+ // There is a special case where we may have a covered null query (EXACT_MAYBE_COVERED) and a
+ // regex with inexact bounds that doesn't need a FETCH (INEXACT_COVERED). In this case, we want
+ // to update the tightness to INEXACT_MAYBE_COVERED, to indicate that we need to check if the
+ // projection allows us to cover the query, but ensure that we will have a filter on the index
+ // if it turns out we can.
+ if (((tightness == BoundsTightness::EXACT_MAYBE_COVERED) &&
+ (tightnessOut == BoundsTightness::INEXACT_COVERED)) ||
+ ((tightness == BoundsTightness::INEXACT_COVERED) &&
+ (tightnessOut == BoundsTightness::EXACT_MAYBE_COVERED))) {
+ tightnessOut = BoundsTightness::INEXACT_MAYBE_COVERED;
+ } else if (tightness < tightnessOut) {
+ // Otherwise, fallback to picking the new tightness if it is looser than the old tightness.
+ tightnessOut = tightness;
+ }
+}
+
void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr,
const BSONElement& elt,
const IndexEntry& index,
@@ -955,51 +1038,45 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr,
});
const InMatchExpression* ime = static_cast<const InMatchExpression*>(expr);
-
*tightnessOut = IndexBoundsBuilder::EXACT;
// Create our various intervals.
IndexBoundsBuilder::BoundsTightness tightness;
- bool arrayOrNullPresent = false;
+ // We check if the $in predicate satisfies conditions to be a covered null predicate on the
+ // basis of indexes, null intervals, and array intervals.
+ const bool entireNullIntervalMatchesPredicate =
+ detectIfEntireNullIntervalMatchesPredicate(ime, index);
for (auto&& equality : ime->getEqualities()) {
- translateEquality(equality, index, isHashed, oilOut, &tightness);
- // The ordering invariant of oil has been violated by the call to translateEquality.
- arrayOrNullPresent = arrayOrNullPresent || equality.type() == BSONType::jstNULL ||
- equality.type() == BSONType::Array;
- if (tightness != IndexBoundsBuilder::EXACT) {
- *tightnessOut = tightness;
+ // First, we generate the bounds the same way that we would do for an individual
+ // equality. This will set tightness to the value it should be if this equality is being
+ // considered in isolation.
+ IndexBoundsBuilder::translateEquality(equality, index, isHashed, oilOut, &tightness);
+ if (entireNullIntervalMatchesPredicate &&
+ (BSONType::jstNULL == equality.type() ||
+ (BSONType::Array == equality.type() && equality.Obj().isEmpty()))) {
+ // We may have a covered null query. In this case, we update both empty array and
+ // null interval tightness to EXACT_MAYBE_COVERED, as individually they would have a
+ // tightness of INEXACT_FETCH. However, we already know we will be able to cover
+ // these intervals together if we have appropriate projections. Note that any other
+ // intervals that cannot be covered may still require the query to use a FETCH.
+ tightness = IndexBoundsBuilder::EXACT_MAYBE_COVERED;
}
+ IndexBoundsBuilder::_mergeTightness(tightness, *tightnessOut);
}
for (auto&& regex : ime->getRegexes()) {
translateRegex(regex.get(), index, oilOut, &tightness);
- if (tightness != IndexBoundsBuilder::EXACT) {
- *tightnessOut = tightness;
- }
- }
-
- if (ime->hasNull()) {
- // A null index key does not always match a null query value so we must fetch the
- // doc and run a full comparison. See SERVER-4529.
- // TODO: Do we already set the tightnessOut by calling translateEquality?
- *tightnessOut = INEXACT_FETCH;
- }
-
- if (ime->hasEmptyArray()) {
- // Empty arrays are indexed as undefined.
- BSONObjBuilder undefinedBob;
- undefinedBob.appendUndefined("");
- oilOut->intervals.push_back(makePointInterval(undefinedBob.obj()));
- *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+ IndexBoundsBuilder::_mergeTightness(tightness, *tightnessOut);
}
// Equalities are already sorted and deduped so unionize is unneccesary if no regexes
// are present. Hashed indexes may also cause the bounds to be out-of-order.
- // Arrays and nulls introduce multiple elements that neccesitate a sort and deduping.
- if (!ime->getRegexes().empty() || index.type == IndexType::INDEX_HASHED ||
- arrayOrNullPresent)
+ // Arrays and nulls introduce multiple elements that necessitate a sort and deduping.
+ if (ime->hasNonScalarOrNonEmptyValues() || index.type == IndexType::INDEX_HASHED) {
unionize(oilOut);
+ }
+
} else if (MatchExpression::GEO == expr->matchType()) {
const GeoMatchExpression* gme = static_cast<const GeoMatchExpression*>(expr);
if ("2dsphere" == elt.valueStringDataSafe()) {
@@ -1315,6 +1392,7 @@ void IndexBoundsBuilder::translateEquality(const BSONElement& data,
}
std::sort(oil->intervals.begin(), oil->intervals.end(), IntervalComparison);
+
*tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
}
diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h
index b48228328f5..d1067caa561 100644
--- a/src/mongo/db/query/index_bounds_builder.h
+++ b/src/mongo/db/query/index_bounds_builder.h
@@ -67,16 +67,28 @@ public:
* increasing tightness. These values are used when we need to do comparison between two
* BoundsTightness values. Such comparisons can answer questions such as "Does predicate
* X have tighter or looser bounds than predicate Y?".
+ *
+ * These enum values are ordered from loosest to tightest.
*/
enum BoundsTightness {
// Index bounds are inexact, and a fetch is required.
INEXACT_FETCH = 0,
- // Index bounds are inexact, but no fetch is required
- INEXACT_COVERED = 1,
+ // Index bounds are inexact, and a fetch may be required depending on the projection.
+ // For example, a count $in query on null + a regex can be covered, but a find query with
+ // the same filter and no projection cannot.
+ INEXACT_MAYBE_COVERED = 1,
+
+ // Index bounds are exact, but a fetch may be required depending on the projection.
+ // For example, a find query on null may be covered, depending on which fields we project
+ // out.
+ EXACT_MAYBE_COVERED = 2,
+
+ // Index bounds are inexact, but no fetch is required.
+ INEXACT_COVERED = 3,
// Index bounds are exact.
- EXACT = 2
+ EXACT = 4
};
/**
@@ -301,6 +313,11 @@ private:
OrderedIntervalList* oilOut,
BoundsTightness* tightnessOut,
interval_evaluation_tree::Builder* ietBuilder);
+
+ /**
+ * Helper method for merging interval tightness for $in expressions.
+ */
+ static void _mergeTightness(const BoundsTightness& tightness, BoundsTightness& tightnessOut);
};
} // namespace mongo
diff --git a/src/mongo/db/query/index_bounds_builder_eq_null_test.cpp b/src/mongo/db/query/index_bounds_builder_eq_null_test.cpp
index af4cdf91303..31e17b6e2c8 100644
--- a/src/mongo/db/query/index_bounds_builder_eq_null_test.cpp
+++ b/src/mongo/db/query/index_bounds_builder_eq_null_test.cpp
@@ -48,7 +48,7 @@ void assertBoundsRepresentEqualsNull(const OrderedIntervalList& oil) {
oil.intervals[1].compare(Interval(fromjson("{'': null, '': null}"), true, true)));
}
-TEST_F(IndexBoundsBuilderTest, TranslateExprEqualToNullIsInexactFetch) {
+TEST_F(IndexBoundsBuilderTest, TranslateExprEqualToNullIsExactMaybeCovered) {
BSONObj keyPattern = BSON("a" << 1);
BSONElement elt = keyPattern.firstElement();
auto testIndex = buildSimpleIndexEntry(keyPattern);
@@ -65,11 +65,11 @@ TEST_F(IndexBoundsBuilderTest, TranslateExprEqualToNullIsInexactFetch) {
oil.intervals[0].compare(Interval(fromjson("{'': undefined, '': undefined}"), true, true)));
ASSERT_EQUALS(Interval::INTERVAL_EQUALS,
oil.intervals[1].compare(Interval(fromjson("{'': null, '': null}"), true, true)));
- ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH);
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT_MAYBE_COVERED);
assertIET(inputParamIdMap, ietBuilder, elt, testIndex, oil);
}
-TEST_F(IndexBoundsBuilderTest, TranslateEqualsToNullShouldBuildInexactBounds) {
+TEST_F(IndexBoundsBuilderTest, TranslateEqualsToNullShouldBuildExactMaybeCoveredBounds) {
BSONObj indexPattern = BSON("a" << 1);
auto testIndex = buildSimpleIndexEntry(indexPattern);
@@ -83,12 +83,12 @@ TEST_F(IndexBoundsBuilderTest, TranslateEqualsToNullShouldBuildInexactBounds) {
expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness, &ietBuilder);
ASSERT_EQUALS(oil.name, "a");
- ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH);
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT_MAYBE_COVERED);
assertBoundsRepresentEqualsNull(oil);
assertIET(inputParamIdMap, ietBuilder, indexPattern.firstElement(), testIndex, oil);
}
-TEST_F(IndexBoundsBuilderTest, TranslateDottedEqualsToNullShouldBuildInexactBounds) {
+TEST_F(IndexBoundsBuilderTest, TranslateDottedEqualsToNullShouldBuildExactMaybeCoveredBounds) {
BSONObj indexPattern = BSON("a.b" << 1);
auto testIndex = buildSimpleIndexEntry(indexPattern);
@@ -102,7 +102,9 @@ TEST_F(IndexBoundsBuilderTest, TranslateDottedEqualsToNullShouldBuildInexactBoun
expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness, &ietBuilder);
ASSERT_EQUALS(oil.name, "a.b");
- ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH);
+ // Depending on the query projection, this will either be converted to EXACT or to INEXACT_FETCH
+ // before we build an IXSCAN plan.
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT_MAYBE_COVERED);
assertBoundsRepresentEqualsNull(oil);
assertIET(inputParamIdMap, ietBuilder, indexPattern.firstElement(), testIndex, oil);
}
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index e05dfd8a5d1..dd75cbce48d 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -1114,47 +1114,13 @@ std::vector<std::unique_ptr<QuerySolutionNode>> QueryPlannerAccess::collapseEqui
}
/**
- * Returns true if this is a null query that can retrieve all the information it needs directly from
- * the index, and so does not need a FETCH stage on top of it. Returns false otherwise.
+ * This helper determines if a query can be covered depending on the query projection.
*/
-bool isCoveredNullQuery(const CanonicalQuery& query,
- MatchExpression* root,
- IndexTag* tag,
- const vector<IndexEntry>& indices,
- const QueryPlannerParams& params) {
- // Sparse indexes and hashed indexes should not use this optimization as they will require a
- // FETCH stage with a filter.
- if (indices[tag->index].sparse || indices[tag->index].type == IndexType::INDEX_HASHED) {
- return false;
- }
-
- // When the index is not multikey, we can support a query on an indexed field searching for null
- // values. This optimization can only be done when the index is not multikey, otherwise empty
- // arrays in the collection will be treated as null/undefined by the index. When the index is
- // multikey, we can support a query searching for both null and empty array values.
- const auto multikeyIndex = indices[tag->index].multikey;
- if (root->matchType() == MatchExpression::MatchType::MATCH_IN) {
- // Check that the query matches null values, if the index is not multikey, or null and empty
- // array values, if the index is multikey. Note that the query may match values other than
- // null (and empty array).
- const auto node = static_cast<const InMatchExpression*>(root);
- if (!node->hasNull() || (multikeyIndex && !node->hasEmptyArray())) {
- return false;
- }
- } else if (ComparisonMatchExpressionBase::isEquality(root->matchType()) && !multikeyIndex) {
- // Check that the query matches null values.
- const auto node = static_cast<const ComparisonMatchExpressionBase*>(root);
- if (node->getData().type() != BSONType::jstNULL) {
- return false;
- }
- } else {
- return false;
- }
-
+bool projNeedsFetch(const CanonicalQuery& query, const QueryPlannerParams& params) {
// If nothing is being projected, the query is fully covered without a fetch.
// This is trivially true for a count query.
if (params.options & QueryPlannerParams::Options::IS_COUNT) {
- return true;
+ return false;
}
// This optimization can only be used for find when the index covers the projection completely.
@@ -1163,7 +1129,7 @@ bool isCoveredNullQuery(const CanonicalQuery& query,
// in the multikey case). Hence, only find queries projecting _id are covered.
auto proj = query.getProj();
if (!proj) {
- return false;
+ return true;
}
// We can cover projections on _id and generated fields and expressions depending only on _id.
@@ -1175,10 +1141,38 @@ bool isCoveredNullQuery(const CanonicalQuery& query,
// Note that it is not possible to project onto dotted paths of _id here, since they may be
// null or missing, and the index cannot differentiate between the two cases, so we would
// still need a FETCH stage.
- return projFields.size() == 1 && *projFields.begin() == "_id";
+ if (projFields.size() == 1 && *projFields.begin() == "_id") {
+ return false;
+ }
}
- return false;
+ return true;
+}
+
+/**
+ * This helper updates a MAYBE_COVERED query tightness to one of EXACT, INEXACT_COVERED, or
+ * INEXACT_FETCH, depending on whether we need a FETCH/filter to answer the query projection.
+ */
+void refineTightnessForMaybeCoveredQuery(const CanonicalQuery& query,
+ const QueryPlannerParams& params,
+ IndexBoundsBuilder::BoundsTightness& tightnessOut) {
+ // We need to refine the tightness in case we have a "MAYBE_COVERED" tightness bound which
+ // depends on the query's projection. We will not have information about the projection
+ // later on in order to make this determination, so we do it here.
+ const bool noFetchNeededForProj = !projNeedsFetch(query, params);
+ if (tightnessOut == IndexBoundsBuilder::EXACT_MAYBE_COVERED) {
+ if (noFetchNeededForProj) {
+ tightnessOut = IndexBoundsBuilder::EXACT;
+ } else {
+ tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+ }
+ } else if (tightnessOut == IndexBoundsBuilder::INEXACT_MAYBE_COVERED) {
+ if (noFetchNeededForProj) {
+ tightnessOut = IndexBoundsBuilder::INEXACT_COVERED;
+ } else {
+ tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
+ }
+ }
}
bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
@@ -1222,11 +1216,6 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
// If we're here, we now know that 'child' can use an index directly and the index is
// over the child's field.
- // We need to track if this is a covered null query so that we can have this information
- // at hand when handling the filter on an indexed AND.
- scanState.isCoveredNullQuery =
- isCoveredNullQuery(query, child, scanState.ixtag, indices, params);
-
// If 'child' is a NOT, then the tag we're interested in is on the NOT's
// child node.
if (MatchExpression::NOT == child->matchType()) {
@@ -1259,6 +1248,7 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
verify(scanState.currentIndexNumber == scanState.ixtag->index);
scanState.tightness = IndexBoundsBuilder::INEXACT_FETCH;
mergeWithLeafNode(child, &scanState);
+ refineTightnessForMaybeCoveredQuery(query, params, scanState.tightness);
handleFilter(&scanState);
} else {
if (nullptr != scanState.currentScan.get()) {
@@ -1278,6 +1268,7 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query,
&scanState.tightness,
scanState.getCurrentIETBuilder());
+ refineTightnessForMaybeCoveredQuery(query, params, scanState.tightness);
handleFilter(&scanState);
}
}
@@ -1693,6 +1684,12 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::_buildIndexedDataAccess(
return soln;
}
+ // We may be able to avoid adding an extra fetch stage even though the bounds are
+ // inexact, for instance if the query is counting null values on an indexed field
+ // without projecting that field. We therefore convert "MAYBE_COVERED" bounds into
+ // either EXACT or INEXACT, depending on the query projection.
+ refineTightnessForMaybeCoveredQuery(query, params, tightness);
+
// If the bounds are exact, the set of documents that satisfy the predicate is
// exactly equal to the set of documents that the scan provides.
//
@@ -1700,11 +1697,7 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::_buildIndexedDataAccess(
// superset of documents that satisfy the predicate, and we must check the
// predicate.
- // We may also be able to avoid adding an extra fetch stage even though the bounds are
- // inexact because the query is counting null values on an indexed field without
- // projecting that field.
- if (tightness == IndexBoundsBuilder::EXACT ||
- isCoveredNullQuery(query, root, tag, indices, params)) {
+ if (tightness == IndexBoundsBuilder::EXACT) {
return soln;
} else if (tightness == IndexBoundsBuilder::INEXACT_COVERED &&
!indices[tag->index].multikey) {
@@ -1850,10 +1843,9 @@ void QueryPlannerAccess::handleFilterAnd(ScanBuildingState* scanState) {
// should always be affixed as a filter. We keep 'curChild' in the $and
// for affixing later.
++scanState->curChild;
- } else if (scanState->tightness == IndexBoundsBuilder::EXACT || scanState->isCoveredNullQuery) {
- // The tightness of the bounds is exact or we are dealing with a covered null query.
- // Either way, we want to remove this child so that when control returns to handleIndexedAnd
- // we know that we don't need it to create a FETCH stage.
+ } else if (scanState->tightness == IndexBoundsBuilder::EXACT) {
+ // The tightness of the bounds is exact. We want to remove this child so that when control
+ // returns to handleIndexedAnd we know that we don't need it to create a FETCH stage.
root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild);
} else if (scanState->tightness == IndexBoundsBuilder::INEXACT_COVERED &&
(INDEX_TEXT == index.type || !index.multikey)) {
diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h
index 6ea44830415..bd9c856ffab 100644
--- a/src/mongo/db/query/planner_access.h
+++ b/src/mongo/db/query/planner_access.h
@@ -145,11 +145,9 @@ private:
struct ScanBuildingState {
ScanBuildingState(MatchExpression* theRoot,
const std::vector<IndexEntry>& indexList,
- bool inArrayOp,
- bool isCoveredNull = false)
+ bool inArrayOp)
: root(theRoot),
inArrayOperator(inArrayOp),
- isCoveredNullQuery(isCoveredNull),
indices(indexList),
currentScan(nullptr),
curChild(0),
@@ -188,9 +186,6 @@ private:
// Are we inside an array operator such as $elemMatch or $all?
bool inArrayOperator;
- // Is this a covered null query?
- bool isCoveredNullQuery;
-
// A list of relevant indices which 'root' may be tagged to use.
const std::vector<IndexEntry>& indices;