diff options
author | Alya Berciu <alya.berciu@mongodb.com> | 2022-11-08 16:37:07 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-11-08 17:13:58 +0000 |
commit | 7e3381f8a26e6bb01d4715961f6babadff651de0 (patch) | |
tree | f19bceb6c5a3f711e349c7ab50ea4f5f4e5dc2f9 | |
parent | d8329fbe00da54b5f8b0f60889364e7920968756 (diff) | |
download | mongo-7e3381f8a26e6bb01d4715961f6babadff651de0.tar.gz |
SERVER-70436 Handle covered $or null queries with regex
-rw-r--r-- | etc/backports_required_for_multiversion_tests.yml | 4 | ||||
-rw-r--r-- | jstests/core/cover_null_queries.js | 322 | ||||
-rw-r--r-- | jstests/core/null_query_semantics.js | 164 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_leaf.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_leaf.h | 11 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.cpp | 144 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.h | 23 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder_eq_null_test.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.cpp | 100 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.h | 7 |
10 files changed, 644 insertions, 148 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 2bcfc26a6e9..6b118574a7f 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -220,6 +220,8 @@ last-continuous: ticket: SERVER-68728 - test_file: jstests/aggregation/expressions/switch_errors.js ticket: SERVER-70190 + - test_file: jstests/core/cover_null_queries.js + ticket: SERVER-70436 suites: null last-lts: all: @@ -517,4 +519,6 @@ last-lts: ticket: SERVER-68728 - test_file: jstests/aggregation/expressions/switch_errors.js ticket: SERVER-70190 + - test_file: jstests/core/cover_null_queries.js + ticket: SERVER-70436 suites: null diff --git a/jstests/core/cover_null_queries.js b/jstests/core/cover_null_queries.js index 50bf0b58b07..a05cb8ec3f8 100644 --- a/jstests/core/cover_null_queries.js +++ b/jstests/core/cover_null_queries.js @@ -3,7 +3,9 @@ * @tags: [ * assumes_unsharded_collection, * requires_non_retryable_writes, - * requires_fcv_52 + * requires_fcv_60, + * # This test could produce unexpected explain output if additional indexes are created. + * assumes_no_implicit_index_creation, * ] */ (function() { @@ -57,12 +59,22 @@ function validateStages({cmdObj, expectedStages, isAgg}) { */ function validateFindCmdOutputAndPlan({filter, projection, expectedStages, expectedOutput}) { const cmdObj = {find: coll.getName(), filter: filter, projection: projection}; + + // Compare index output with expected output. if (expectedOutput) { const res = assert.commandWorked(coll.runCommand(cmdObj)); const ouputArray = new DBCommandCursor(coll.getDB(), res).toArray(); assert(arrayEq(expectedOutput, ouputArray), ouputArray); } + + // Validate explain. validateStages({cmdObj, expectedStages}); + + // Verify that we get the same output as we expect without an index. + const noIndexCmdObj = Object.assign(cmdObj, {hint: {$natural: 1}}); + const resNoIndex = assert.commandWorked(coll.runCommand(noIndexCmdObj)); + const noIndexOutArr = new DBCommandCursor(coll.getDB(), resNoIndex).toArray(); + assert(arrayEq(expectedOutput, noIndexOutArr), noIndexOutArr); } /** @@ -71,10 +83,18 @@ function validateFindCmdOutputAndPlan({filter, projection, expectedStages, expec * are present in the plan returned. */ function validateSimpleCountCmdOutputAndPlan({filter, expectedStages, expectedCount}) { + // Compare index output with expected output. const cmdObj = {count: coll.getName(), query: filter}; const res = assert.commandWorked(coll.runCommand(cmdObj)); assert.eq(res.n, expectedCount); + + // Validate explain. validateStages({cmdObj, expectedStages}); + + // Verify that we get the same output with and without an index. + const noIndexCmdObj = Object.assign(cmdObj, {hint: {$natural: 1}}); + const resNoIndex = assert.commandWorked(coll.runCommand(noIndexCmdObj)); + assert.eq(resNoIndex.n, expectedCount); } /** @@ -88,11 +108,33 @@ function validateCountAggCmdOutputAndPlan({filter, expectedStages, expectedCount pipeline: pipeline || [{$match: filter}, {$count: "count"}], cursor: {}, }; + + // Compare index output with expected output. const cmdRes = assert.commandWorked(coll.runCommand(cmdObj)); const countRes = cmdRes.cursor.firstBatch; assert.eq(countRes.length, 1, cmdRes); assert.eq(countRes[0].count, expectedCount, countRes); + + // Validate explain. validateStages({cmdObj, expectedStages, isAgg: true}); + + // Verify that we get the same output as we expect without an index. + const noIndexCmdObj = Object.assign(cmdObj, {hint: {$natural: 1}}); + const resNoIndex = assert.commandWorked(coll.runCommand(noIndexCmdObj)); + const countResNoIndex = resNoIndex.cursor.firstBatch; + assert.eq(countResNoIndex.length, 1, cmdRes); + assert.eq(countResNoIndex[0].count, expectedCount, countRes); +} + +/** + * Same as above, but uses a $group count. + */ +function validateGroupCountAggCmdOutputAndPlan({filter, expectedStages, expectedCount}) { + validateCountAggCmdOutputAndPlan({ + expectedStages, + expectedCount, + pipeline: [{$match: filter}, {$group: {_id: 0, count: {$count: {}}}}] + }); } assert.commandWorked(coll.createIndex({a: 1, _id: 1})); @@ -152,7 +194,7 @@ validateFindCmdOutputAndPlan({ expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); -// Same as above, but special case for null and empty array predicate. +// We can cover a $in with null and an empty array predicate. validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, []]}}, projection: {_id: 1}, @@ -165,6 +207,21 @@ validateSimpleCountCmdOutputAndPlan({ expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); +// We cannot cover a $in with null and an array predicate. +// TODO SERVER-71058: It should be possible to cover this case and the more general case of matching +// an array on a non-multikey index. +validateFindCmdOutputAndPlan({ + filter: {a: {$in: [null, ["a"]]}}, + projection: {_id: 1}, + expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}], + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, +}); +validateSimpleCountCmdOutputAndPlan({ + filter: {a: {$in: [null, ["a"]]}}, + expectedCount: 4, + expectedStages: {"IXSCAN": 1, "FETCH": 1, "COUNT": 1}, +}); + // Verify that a more complex projection that only relies on the _id field does not need a FETCH. validateFindCmdOutputAndPlan({ filter: {a: null}, @@ -691,4 +748,265 @@ validateCountAggCmdOutputAndPlan({ expectedCount: 4, expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0}, }); + +// Validate that we can use the optimization when we have regex without array elements in a $in or +// $or. See SERVER-70436 for more details. +coll.drop(); + +assert.commandWorked(coll.insertMany([ + {_id: 1, a: '123456'}, + {_id: 2, a: '1234567'}, + {_id: 3, a: ' 12345678'}, + {_id: 4, a: '444456'}, + {_id: 5, a: ''}, + {_id: 6, a: null}, + {_id: 7}, +])); + +assert.commandWorked(coll.createIndex({a: 1, _id: 1})); + +// TODO SERVER-70998: Can apply optimization in case without regex; however, we still can't use a +// COUNT_SCAN in this case. +validateFindCmdOutputAndPlan({ + filter: {$or: [{a: null}, {a: ""}]}, + projection: {_id: 1}, + expectedOutput: [{_id: 5}, {_id: 6}, {_id: 7}], + expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, +}); +validateSimpleCountCmdOutputAndPlan({ + filter: {$or: [{a: null}, {a: ""}]}, + expectedCount: 3, + expectedStages: {"COUNT": 1, "IXSCAN": 1, "FETCH": 0}, +}); +validateGroupCountAggCmdOutputAndPlan({ + filter: {$or: [{a: null}, {a: ""}]}, + expectedCount: 3, + expectedStages: {"IXSCAN": 1, "FETCH": 0}, +}); + +// Can still apply optimization when we have regex. +validateFindCmdOutputAndPlan({ + filter: {$or: [{a: null}, {a: {$regex: "^$"}}]}, + projection: {_id: 1}, + expectedOutput: [{_id: 5}, {_id: 6}, {_id: 7}], + expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, +}); +validateSimpleCountCmdOutputAndPlan({ + filter: {$or: [{a: null}, {a: {$regex: "^$"}}]}, + expectedCount: 3, + expectedStages: {"IXSCAN": 1, "FETCH": 0, "COUNT": 1}, +}); +validateGroupCountAggCmdOutputAndPlan({ + filter: {$or: [{a: null}, {a: {$regex: "^$"}}]}, + expectedCount: 3, + expectedStages: {"IXSCAN": 1, "FETCH": 0}, +}); + +// Now test case with a multikey index. We can't leverage the optimization here. +assert.commandWorked(coll.insert({_id: 8, a: [1, 2, 3]})); +assert.commandWorked(coll.insert({_id: 9, a: []})); + +validateFindCmdOutputAndPlan({ + filter: {$or: [{a: null}, {a: []}, {a: {$regex: "^$"}}]}, + projection: {_id: 1}, + expectedOutput: [{_id: 5}, {_id: 6}, {_id: 7}, {_id: 9}], + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, +}); +validateSimpleCountCmdOutputAndPlan({ + filter: {$or: [{a: null}, {a: []}, {a: {$regex: "^$"}}]}, + expectedCount: 4, + expectedStages: {"COUNT": 1, "IXSCAN": 1, "FETCH": 1}, +}); +validateGroupCountAggCmdOutputAndPlan({ + filter: {$or: [{a: null}, {a: []}, {a: {$regex: "^$"}}]}, + expectedCount: 4, + expectedStages: {"IXSCAN": 1, "FETCH": 1}, +}); + +// We also shouldn't cover queries on multikey indexes where $in includes an array, as we will still +// need a filter after the IXSCAN to correctly return +validateFindCmdOutputAndPlan({ + filter: {$or: [{a: null}, {a: []}, {a: [2]}]}, + projection: {_id: 1}, + expectedOutput: [{_id: 6}, {_id: 7}, {_id: 9}], + expectedStages: {"IXSCAN": 1, "FETCH": 1}, +}); +validateSimpleCountCmdOutputAndPlan({ + filter: {$or: [{a: null}, {a: []}, {a: [2]}]}, + expectedCount: 3, + expectedStages: {"IXSCAN": 1, "FETCH": 1}, +}); +validateGroupCountAggCmdOutputAndPlan({ + filter: {$or: [{a: null}, {a: []}, {a: [2]}]}, + expectedCount: 3, + expectedStages: {"IXSCAN": 1, "FETCH": 1}, +}); + +// Validate that when we have a dotted path, we return the correct results for null queries. +coll.drop(); +assert.commandWorked(coll.insertMany([ + {_id: 1, a: 1}, + {_id: 2, a: null}, + {_id: 3}, + {_id: 4, a: {b: 1}}, + {_id: 5, a: {b: null}}, + {_id: 6, a: {c: 1}}, +])); +assert.commandWorked(coll.createIndex({"a.b": 1, _id: 1})); + +validateFindCmdOutputAndPlan({ + filter: {"a.b": null}, + projection: {_id: 1}, + expectedOutput: [{_id: 1}, {_id: 2}, {_id: 3}, {_id: 5}, {_id: 6}], + expectedStages: {"IXSCAN": 1, "PROJECTION_COVERED": 1, "FETCH": 0}, +}); +validateSimpleCountCmdOutputAndPlan({ + filter: {"a.b": null}, + expectedCount: 5, + expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0}, +}); +validateGroupCountAggCmdOutputAndPlan({ + filter: {"a.b": null}, + expectedCount: 5, + expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0}, +}); + +validateFindCmdOutputAndPlan({ + filter: {a: {b: null}}, + projection: {_id: 1}, + expectedOutput: [{_id: 5}], + expectedStages: {"COLLSCAN": 1, "PROJECTION_SIMPLE": 1}, +}); +validateSimpleCountCmdOutputAndPlan({ + filter: {a: {b: null}}, + expectedCount: 1, + expectedStages: {"COLLSCAN": 1}, +}); +validateGroupCountAggCmdOutputAndPlan({ + filter: {a: {b: null}}, + expectedCount: 1, + expectedStages: {"COLLSCAN": 1}, +}); + +// Still need fetch if we don't have a sufficiently restrictive projection. +validateFindCmdOutputAndPlan({ + filter: {"a.b": null}, + projection: {_id: 1, a: 1}, + expectedOutput: [ + {_id: 1, a: 1}, + {_id: 2, a: null}, + {_id: 3}, + {_id: 5, a: {b: null}}, + {_id: 6, a: {c: 1}}, + ], + expectedStages: {"IXSCAN": 1, "FETCH": 1}, +}); + +// Make index multikey, and test case where field b is nested in an array. +assert.commandWorked(coll.insertMany([ + {_id: 7, a: [{b: null}]}, + {_id: 8, a: [{b: []}]}, + {_id: 9, a: [{b: [1, 2, 3]}]}, + {_id: 10, a: [{b: 123}]}, + {_id: 11, a: [{c: 123}]}, + {_id: 12, a: []}, + {_id: 13, a: [{}]}, + {_id: 14, a: [1, 2, 3]}, + {_id: 15, a: [{b: 1}, {c: 2}, {b: 3}]}, + {_id: 16, a: [null]}, +])); + +validateFindCmdOutputAndPlan({ + filter: {"a.b": null}, + projection: {_id: 1}, + expectedOutput: [ + {_id: 1}, + {_id: 2}, + {_id: 3}, + {_id: 5}, + {_id: 6}, + {_id: 7}, + {_id: 11}, + {_id: 13}, + {_id: 15} + ], + expectedStages: {"IXSCAN": 1, "PROJECTION_SIMPLE": 1, "FETCH": 1}, +}); +validateSimpleCountCmdOutputAndPlan({ + filter: {"a.b": null}, + expectedCount: 9, + expectedStages: {"COUNT": 1, "IXSCAN": 1, "FETCH": 1}, +}); +validateGroupCountAggCmdOutputAndPlan({ + filter: {"a.b": null}, + expectedCount: 9, + expectedStages: {"IXSCAN": 1, "FETCH": 1}, +}); + +validateFindCmdOutputAndPlan({ + filter: {a: {b: null}}, + projection: {_id: 1}, + expectedOutput: [{_id: 5}, {_id: 7}], + expectedStages: { + "COLLSCAN": 1, + "PROJECTION_SIMPLE": 1, + }, +}); +validateSimpleCountCmdOutputAndPlan({ + filter: {a: {b: null}}, + expectedCount: 2, + expectedStages: {"COLLSCAN": 1}, +}); +validateGroupCountAggCmdOutputAndPlan({ + filter: {a: {b: null}}, + expectedCount: 2, + expectedStages: {"COLLSCAN": 1}, +}); + +validateFindCmdOutputAndPlan({ + filter: {a: [{b: null}]}, + projection: {_id: 1}, + expectedOutput: [{_id: 7}], + expectedStages: {"COLLSCAN": 1, "PROJECTION_SIMPLE": 1}, +}); +validateSimpleCountCmdOutputAndPlan({ + filter: {a: [{b: null}]}, + expectedCount: 1, + expectedStages: {"COLLSCAN": 1}, +}); +validateGroupCountAggCmdOutputAndPlan({ + filter: {a: [{b: null}]}, + expectedCount: 1, + expectedStages: {"COLLSCAN": 1}, +}); + +// We still need a FETCH for composite paths, because both {a: [1,2,3]} and {"a.b": null} generate +// null index keys, but the former should not match the predicate below. +validateFindCmdOutputAndPlan({ + filter: {"a.b": {$in: [null, []]}}, + projection: {_id: 1}, + expectedOutput: [ + {_id: 1}, + {_id: 2}, + {_id: 3}, + {_id: 5}, + {_id: 6}, + {_id: 7}, + {_id: 8}, + {_id: 11}, + {_id: 13}, + {_id: 15} + ], + expectedStages: {"IXSCAN": 1, "PROJECTION_SIMPLE": 1, "FETCH": 1}, +}); +validateSimpleCountCmdOutputAndPlan({ + filter: {"a.b": {$in: [null, []]}}, + expectedCount: 10, + expectedStages: {"IXSCAN": 1, "FETCH": 1}, +}); +validateGroupCountAggCmdOutputAndPlan({ + filter: {"a.b": {$in: [null, []]}}, + expectedCount: 10, + expectedStages: {"IXSCAN": 1, "FETCH": 1}, +}); })(); diff --git a/jstests/core/null_query_semantics.js b/jstests/core/null_query_semantics.js index 1ff14cc710c..aa9042c5ac5 100644 --- a/jstests/core/null_query_semantics.js +++ b/jstests/core/null_query_semantics.js @@ -41,6 +41,9 @@ function testNullSemantics(coll) { [{_id: "a_null", a: null}, {_id: "a_undefined", a: undefined}, {_id: "no_a"}]; assert(resultsEq(expected, noProjectResults), tojson(noProjectResults)); + const count = coll.count({a: {$eq: null}}); + assert.eq(count, expected.length); + const projectResults = coll.find({a: {$eq: null}}, projectToOnlyA).toArray(); assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults)); }()); @@ -57,6 +60,9 @@ function testNullSemantics(coll) { ]; assert(resultsEq(noProjectResults, expected), tojson(noProjectResults)); + const count = coll.count({a: {$ne: null}}); + assert.eq(count, expected.length); + const projectResults = coll.find({a: {$ne: null}}, projectToOnlyA).toArray(); assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults)); }()); @@ -72,6 +78,9 @@ function testNullSemantics(coll) { {_id: "no_a"}, ]; + const count = coll.count(query); + assert.eq(count, expected.length); + assert(resultsEq(noProjectResults, expected), noProjectResults); const projectResults = coll.find(query, projectToOnlyA).toArray(); @@ -89,6 +98,9 @@ function testNullSemantics(coll) { {_id: "no_a"}, ]; + const count = coll.count(query); + assert.eq(count, expected.length); + assert(resultsEq(noProjectResults, expected), noProjectResults); const projectResults = coll.find(query, projectToOnlyA).toArray(); @@ -106,6 +118,9 @@ function testNullSemantics(coll) { {_id: "a_subobject_b_undefined", a: {b: undefined}}, ]; + const count = coll.count(query); + assert.eq(count, expected.length); + assert(resultsEq(noProjectResults, expected), noProjectResults); const projectResults = coll.find(query, projectToOnlyA).toArray(); @@ -126,6 +141,9 @@ function testNullSemantics(coll) { ]; assert(resultsEq(noProjectResults, expected), tojson(noProjectResults)); + const count = coll.count(query); + assert.eq(count, expected.length); + const projectResults = coll.find(query, projectToOnlyA).toArray(); assert(resultsEq(projectResults, extractAValues(expected)), projectResults); }()); @@ -137,6 +155,9 @@ function testNullSemantics(coll) { ]; assert(resultsEq(noProjectResults, expected), tojson(noProjectResults)); + const count = coll.count({a: {$exists: false}}); + assert.eq(count, expected.length); + const projectResults = coll.find({a: {$exists: false}}, projectToOnlyA).toArray(); assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults)); }()); @@ -144,17 +165,19 @@ function testNullSemantics(coll) { // Test the semantics of the query {"a.b": {$eq: null}}. (function testDottedEqualsNull() { const noProjectResults = coll.find({"a.b": {$eq: null}}).toArray(); - assert(resultsEq(noProjectResults, - [ - {_id: "a_empty_subobject", a: {}}, - {_id: "a_null", a: null}, - {_id: "a_number", a: 4}, - {_id: "a_subobject_b_null", a: {b: null}}, - {_id: "a_subobject_b_undefined", a: {b: undefined}}, - {_id: "a_undefined", a: undefined}, - {_id: "no_a"} - ]), - tojson(noProjectResults)); + const expected = [ + {_id: "a_empty_subobject", a: {}}, + {_id: "a_null", a: null}, + {_id: "a_number", a: 4}, + {_id: "a_subobject_b_null", a: {b: null}}, + {_id: "a_subobject_b_undefined", a: {b: undefined}}, + {_id: "a_undefined", a: undefined}, + {_id: "no_a"} + ]; + assert(resultsEq(noProjectResults, expected), tojson(noProjectResults)); + + const count = coll.count({"a.b": {$eq: null}}); + assert.eq(count, expected.length); const projectResults = coll.find({"a.b": {$eq: null}}, projectToOnlyADotB).toArray(); assert(resultsEq(projectResults, @@ -165,8 +188,11 @@ function testNullSemantics(coll) { // Test the semantics of the query {"a.b": {$ne: null}}. (function testDottedNotEqualsNull() { const noProjectResults = coll.find({"a.b": {$ne: null}}).toArray(); - assert(resultsEq(noProjectResults, [{_id: "a_subobject_b_not_null", a: {b: "hi"}}]), - tojson(noProjectResults)); + const expected = [{_id: "a_subobject_b_not_null", a: {b: "hi"}}]; + assert(resultsEq(noProjectResults, expected), tojson(noProjectResults)); + + const count = coll.count({"a.b": {$ne: null}}); + assert.eq(count, expected.length); const projectResults = coll.find({"a.b": {$ne: null}}, projectToOnlyADotB).toArray(); assert(resultsEq(projectResults, [{a: {b: "hi"}}]), tojson(projectResults)); @@ -183,6 +209,9 @@ function testNullSemantics(coll) { ]; assert(resultsEq(noProjectResults, expected), tojson(noProjectResults)); + const count = coll.count({"a.b": {$exists: false}}); + assert.eq(count, expected.length); + const projectResults = coll.find({"a.b": {$exists: false}}, projectToOnlyADotB).toArray(); assert(resultsEq(projectResults, [{}, {a: {}}, {}, {}, {}]), tojson(projectResults)); }()); @@ -202,6 +231,9 @@ function testNullSemantics(coll) { `Expected no results for query ${tojson(elemMatchQuery)}, got ` + tojson(noProjectResults)); + const count = coll.count(elemMatchQuery); + assert.eq(count, 0); + let projectResults = coll.find(elemMatchQuery, projectToOnlyA).toArray(); assert(resultsEq(projectResults, []), `Expected no results for query ${tojson(elemMatchQuery)}, got ` + @@ -250,6 +282,9 @@ function testNullSemantics(coll) { ]; assert(resultsEq(noProjectResults, expected), tojson(noProjectResults)); + const count = coll.count({a: {$eq: null}}); + assert.eq(count, expected.length); + const projectResults = coll.find({a: {$eq: null}}, projectToOnlyA).toArray(); assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults)); }()); @@ -274,6 +309,9 @@ function testNullSemantics(coll) { ]; assert(resultsEq(noProjectResults, expected), tojson(noProjectResults)); + const count = coll.count({a: {$ne: null}}); + assert.eq(count, expected.length); + const projectResults = coll.find({a: {$ne: null}}, projectToOnlyA).toArray(); assert(resultsEq(projectResults, extractAValues(expected)), tojson(projectResults)); }()); @@ -297,6 +335,8 @@ function testNullSemantics(coll) { {_id: "a_value_array_no_nulls", a: [1, "string", 4]}, ]; assert(resultsEq(noProjectResults, expected), tojson(noProjectResults)); + const count = coll.count({a: {$not: {$gte: null}}}); + assert.eq(count, expected.length); }()); // Test the semantics of the query {a: {$in: [null, <number>]}}. @@ -314,6 +354,9 @@ function testNullSemantics(coll) { assert(resultsEq(noProjectResults, expected), noProjectResults); + const count = coll.count({a: {$in: [null, 75]}}); + assert.eq(count, expected.length); + const projectResults = coll.find(query, projectToOnlyA).toArray(); assert(resultsEq(projectResults, extractAValues(expected)), projectResults); }()); @@ -333,6 +376,9 @@ function testNullSemantics(coll) { assert(resultsEq(noProjectResults, expected), noProjectResults); + const count = coll.count({$or: [{a: null}, {a: 75}]}); + assert.eq(count, expected.length); + const projectResults = coll.find(query, projectToOnlyA).toArray(); assert(resultsEq(projectResults, extractAValues(expected)), projectResults); }()); @@ -360,6 +406,9 @@ function testNullSemantics(coll) { assert(resultsEq(noProjectResults, expected), noProjectResults); + const count = coll.count({a: {$nin: [null, 75]}}); + assert.eq(count, expected.length); + const projectResults = coll.find(query, projectToOnlyA).toArray(); assert(resultsEq(projectResults, extractAValues(expected)), projectResults); }()); @@ -395,6 +444,9 @@ function testNullSemantics(coll) { assert(resultsEq(noProjectResults, expected), noProjectResults); + const count = coll.count({a: {$nin: [null, []]}}); + assert.eq(count, expected.length); + const projectResults = coll.find(query, projectToOnlyA).toArray(); assert(resultsEq(projectResults, extractAValues(expected)), projectResults); }()); @@ -420,6 +472,9 @@ function testNullSemantics(coll) { assert(resultsEq(noProjectResults, expected), noProjectResults); + const count = coll.count(query); + assert.eq(count, expected.length); + const projectResults = coll.find(query, projectToOnlyA).toArray(); assert(resultsEq(projectResults, extractAValues(expected)), projectResults); }()); @@ -435,6 +490,9 @@ function testNullSemantics(coll) { ]; assert(resultsEq(noProjectResults, expectedEqualToNull), tojson(noProjectResults)); + const count = coll.count({a: {$elemMatch: {$eq: null}}}); + assert.eq(count, expectedEqualToNull.length); + let projectResults = coll.find({a: {$elemMatch: {$eq: null}}}, projectToOnlyA).toArray(); assert(resultsEq(projectResults, extractAValues(expectedEqualToNull)), tojson(projectResults)); @@ -467,25 +525,23 @@ function testNullSemantics(coll) { // value for "b". (function testDottedEqualsNull() { const noProjectResults = coll.find({"a.b": {$eq: null}}).toArray(); - assert( - resultsEq(noProjectResults, - [ - {_id: "a_empty_subobject", a: {}}, - {_id: "a_null", a: null}, - {_id: "a_number", a: 4}, - {_id: "a_subobject_b_null", a: {b: null}}, - {_id: "a_subobject_b_undefined", a: {b: undefined}}, - {_id: "a_undefined", a: undefined}, - {_id: "no_a"}, - { - _id: "a_object_array_all_b_nulls", - a: [{b: null}, {b: undefined}, {b: null}, {}] - }, - {_id: "a_object_array_some_b_nulls", a: [{b: null}, {b: 3}, {b: null}]}, - {_id: "a_object_array_some_b_undefined", a: [{b: undefined}, {b: 3}]}, - {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]}, - ]), - tojson(noProjectResults)); + const expected = [ + {_id: "a_empty_subobject", a: {}}, + {_id: "a_null", a: null}, + {_id: "a_number", a: 4}, + {_id: "a_subobject_b_null", a: {b: null}}, + {_id: "a_subobject_b_undefined", a: {b: undefined}}, + {_id: "a_undefined", a: undefined}, + {_id: "no_a"}, + {_id: "a_object_array_all_b_nulls", a: [{b: null}, {b: undefined}, {b: null}, {}]}, + {_id: "a_object_array_some_b_nulls", a: [{b: null}, {b: 3}, {b: null}]}, + {_id: "a_object_array_some_b_undefined", a: [{b: undefined}, {b: 3}]}, + {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]}, + ]; + assert(resultsEq(noProjectResults, expected), tojson(noProjectResults)); + + const count = coll.count({"a.b": {$eq: null}}); + assert.eq(count, expected.length); const projectResults = coll.find({"a.b": {$eq: null}}, projectToOnlyADotB).toArray(); assert(resultsEq(projectResults, @@ -508,18 +564,20 @@ function testNullSemantics(coll) { // Test the semantics of the query {"a.b": {$ne: null}}. (function testDottedNotEqualsNull() { const noProjectResults = coll.find({"a.b": {$ne: null}}).toArray(); - assert(resultsEq(noProjectResults, - [ - {_id: "a_subobject_b_not_null", a: {b: "hi"}}, - {_id: "a_double_array", a: [[]]}, - {_id: "a_empty_array", a: []}, - {_id: "a_object_array_no_b_nulls", a: [{b: 1}, {b: 3}, {b: "string"}]}, - {_id: "a_value_array_all_nulls", a: [null, null]}, - {_id: "a_value_array_no_nulls", a: [1, "string", 4]}, - {_id: "a_value_array_with_null", a: [1, "string", null, 4]}, - {_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]} - ]), - tojson(noProjectResults)); + const expected = [ + {_id: "a_subobject_b_not_null", a: {b: "hi"}}, + {_id: "a_double_array", a: [[]]}, + {_id: "a_empty_array", a: []}, + {_id: "a_object_array_no_b_nulls", a: [{b: 1}, {b: 3}, {b: "string"}]}, + {_id: "a_value_array_all_nulls", a: [null, null]}, + {_id: "a_value_array_no_nulls", a: [1, "string", 4]}, + {_id: "a_value_array_with_null", a: [1, "string", null, 4]}, + {_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]} + ]; + assert(resultsEq(noProjectResults, expected), tojson(noProjectResults)); + + const count = coll.count({"a.b": {$ne: null}}); + assert.eq(count, expected.length); const projectResults = coll.find({"a.b": {$ne: null}}, projectToOnlyADotB).toArray(); assert(resultsEq(projectResults, @@ -554,6 +612,9 @@ function testNullSemantics(coll) { {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]}, ]; + const count = coll.count({"a.b": {$in: [null, 75]}}); + assert.eq(count, expected.length); + assert(resultsEq(noProjectResults, expected), noProjectResults); }()); @@ -575,6 +636,9 @@ function testNullSemantics(coll) { {_id: "a_object_array_some_b_missing", a: [{b: 3}, {}]}, ]; + const count = coll.count({$or: [{"a.b": null}, {"a.b": 75}]}); + assert.eq(count, expected.length); + assert(resultsEq(noProjectResults, expected), noProjectResults); }()); @@ -595,6 +659,9 @@ function testNullSemantics(coll) { assert(resultsEq(noProjectResults, expected), noProjectResults); + const count = coll.count({"a.b": {$nin: [null, 75]}}); + assert.eq(count, expected.length); + const projectResults = coll.find(query, projectToOnlyA).toArray(); assert(resultsEq(projectResults, extractAValues(expected)), projectResults); }()); @@ -613,6 +680,9 @@ function testNullSemantics(coll) { {_id: "a_value_array_with_undefined", a: [1, "string", undefined, 4]}, ]; + const count = coll.count({"a.b": {$nin: [null, /^str.*/]}}); + assert.eq(count, expected.length); + assert(resultsEq(noProjectResults, expected), noProjectResults); const projectResults = coll.find(query, projectToOnlyA).toArray(); @@ -625,6 +695,9 @@ function testNullSemantics(coll) { let results = coll.find({"a.b": {$elemMatch: {$eq: null}}}).toArray(); assert(resultsEq(results, []), tojson(results)); + const count = coll.count({"a.b": {$elemMatch: {$eq: null}}}); + assert.eq(count, 0); + results = coll.find({"a.b": {$elemMatch: {$ne: null}}}).toArray(); assert(resultsEq(results, []), tojson(results)); }()); @@ -642,6 +715,9 @@ function testNullSemantics(coll) { ]; assert(resultsEq(noProjectResults, expectedEqualToNull), tojson(noProjectResults)); + const count = coll.count({a: {$elemMatch: {b: {$eq: null}}}}); + assert.eq(count, expectedEqualToNull.length); + let projectResults = coll.find({a: {$elemMatch: {b: {$eq: null}}}}, projectToOnlyADotB).toArray(); assert(resultsEq(projectResults, diff --git a/src/mongo/db/matcher/expression_leaf.cpp b/src/mongo/db/matcher/expression_leaf.cpp index 31157666e92..990f91c1796 100644 --- a/src/mongo/db/matcher/expression_leaf.cpp +++ b/src/mongo/db/matcher/expression_leaf.cpp @@ -434,6 +434,7 @@ std::unique_ptr<MatchExpression> InMatchExpression::shallowClone() const { } next->_hasNull = _hasNull; next->_hasEmptyArray = _hasEmptyArray; + next->_hasNonEmptyArrayOrObject = _hasNonEmptyArrayOrObject; next->_equalitySet = _equalitySet; next->_originalEqualityVector = _originalEqualityVector; next->_equalityStorage = _equalityStorage; @@ -577,6 +578,8 @@ Status InMatchExpression::setEqualities(std::vector<BSONElement> equalities) { _hasNull = true; } else if (equality.type() == BSONType::Array && equality.Obj().isEmpty()) { _hasEmptyArray = true; + } else if (equality.type() == BSONType::Array || equality.type() == BSONType::Object) { + _hasNonEmptyArrayOrObject = true; } } diff --git a/src/mongo/db/matcher/expression_leaf.h b/src/mongo/db/matcher/expression_leaf.h index 46a80aa5e91..a00ed9e31f1 100644 --- a/src/mongo/db/matcher/expression_leaf.h +++ b/src/mongo/db/matcher/expression_leaf.h @@ -723,6 +723,14 @@ public: return _hasEmptyArray; } + bool hasNonEmptyArrayOrObject() const { + return _hasNonEmptyArrayOrObject; + } + + bool hasNonScalarOrNonEmptyValues() const { + return hasNonEmptyArrayOrObject() || hasNull() || hasRegex(); + } + void acceptVisitor(MatchExpressionMutableVisitor* visitor) final { visitor->visit(this); } @@ -748,6 +756,9 @@ private: // Whether or not '_equalities' has an empty array element in it. bool _hasEmptyArray = false; + // Whether or not '_equalities' has a non-empty array or object element in it. + bool _hasNonEmptyArrayOrObject = false; + // Collator used to construct '_eltCmp'; const CollatorInterface* _collator = nullptr; diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 01e13b5f058..3c027bde140 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -108,13 +108,38 @@ Interval makeNullPointInterval(bool isHashed) { return isHashed ? kHashedNullInterval : IndexBoundsBuilder::kNullPointInterval; } +/** + * This helper updates the query bounds tightness for the limited set of conditions where we see a + * null query that can be covered. + */ +void updateTightnessForNullQuery(const IndexEntry& index, + IndexBoundsBuilder::BoundsTightness* tightnessOut) { + if (index.sparse || index.type == IndexType::INDEX_HASHED) { + // Sparse indexes and hashed indexes require a FETCH stage with a filter for null queries. + *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; + return; + } + + if (index.multikey) { + // If we have a simple equality null query and our index is multikey, we cannot cover the + // query. This is because null intervals are translated into the null and undefined point + // intervals, and the undefined point interval includes entries for []. In the case of a + // single null interval, [] should not match. + *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; + return; + } + + // The query may be fully covered by the index if the projection allows it, since the case above + // about the empty array can only become an issue if there is an empty array present, which + // would mark the index as multikey. + *tightnessOut = IndexBoundsBuilder::EXACT_MAYBE_COVERED; +} + void makeNullEqualityBounds(const IndexEntry& index, bool isHashed, OrderedIntervalList* oil, IndexBoundsBuilder::BoundsTightness* tightnessOut) { - // An equality to null predicate cannot be covered because the index does not distinguish - // between the lack of a value and the literal value null. - *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; + updateTightnessForNullQuery(index, tightnessOut); // There are two values that could possibly be equal to null in an index: undefined and null. oil->intervals.push_back(makeUndefinedPointInterval(isHashed)); @@ -254,7 +279,10 @@ bool IndexBoundsBuilder::canUseCoveredMatching(const MatchExpression* expr, IndexBoundsBuilder::BoundsTightness tightness; OrderedIntervalList oil; translate(expr, BSONElement{}, index, &oil, &tightness, /* iet::Builder */ nullptr); - return tightness >= IndexBoundsBuilder::INEXACT_COVERED; + // We have additional tightness values (MAYBE_COVERED), but we cannot generally cover those + // cases unless we have an appropriate projection. + return tightness == IndexBoundsBuilder::INEXACT_COVERED || + tightness == IndexBoundsBuilder::EXACT; } // static @@ -404,6 +432,61 @@ const Interval IndexBoundsBuilder::kNullPointInterval = const Interval IndexBoundsBuilder::kEmptyArrayPointInterval = IndexBoundsBuilder::makePointInterval(kEmptyArrayElementObj); +bool detectIfEntireNullIntervalMatchesPredicate(const InMatchExpression* ime, + const IndexEntry& index) { + if (!ime->hasNull()) { + // This isn't a null query. + return false; + } + + if (index.sparse || (IndexType::INDEX_HASHED == index.type)) { + // Sparse indexes and hashed indexes still require a FETCH stage with a filter for null + // queries. + return false; + } + + // Given the context of having a null $in query with eligible indexes, we may be able to cover + // some combinations of intervals that we could not cover individually. + if (index.multikey) { + // If the path has multiple components and we have a multikey index, we still need a FETCH + // in order to defend against cases where we have a multikey index on "a". These documents + // will generate null index keys: {"a.b": null} and {a: [1,2,3]}. However, a query like + // {"a.b": {$in: [null, []]}} should not match {a: [1, 2, 3]}. + // TODO SERVER-71021: it may be possible to cover more cases here. + if (ime->fieldRef()->numParts() > 1) { + return false; + } + + // We must have an equality to an empty array for this null query to be covered, otherwise, + // because we generate both null and undefined point intervals for a null query, and because + // a multikey index reuses the same entry for [] and undefined, we will not be able to cover + // the query. + if (!ime->hasEmptyArray()) { + return false; + } + } + + return true; +} + +void IndexBoundsBuilder::_mergeTightness(const BoundsTightness& tightness, + BoundsTightness& tightnessOut) { + // There is a special case where we may have a covered null query (EXACT_MAYBE_COVERED) and a + // regex with inexact bounds that doesn't need a FETCH (INEXACT_COVERED). In this case, we want + // to update the tightness to INEXACT_MAYBE_COVERED, to indicate that we need to check if the + // projection allows us to cover the query, but ensure that we will have a filter on the index + // if it turns out we can. + if (((tightness == BoundsTightness::EXACT_MAYBE_COVERED) && + (tightnessOut == BoundsTightness::INEXACT_COVERED)) || + ((tightness == BoundsTightness::INEXACT_COVERED) && + (tightnessOut == BoundsTightness::EXACT_MAYBE_COVERED))) { + tightnessOut = BoundsTightness::INEXACT_MAYBE_COVERED; + } else if (tightness < tightnessOut) { + // Otherwise, fallback to picking the new tightness if it is looser than the old tightness. + tightnessOut = tightness; + } +} + void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, const BSONElement& elt, const IndexEntry& index, @@ -955,51 +1038,45 @@ void IndexBoundsBuilder::_translatePredicate(const MatchExpression* expr, }); const InMatchExpression* ime = static_cast<const InMatchExpression*>(expr); - *tightnessOut = IndexBoundsBuilder::EXACT; // Create our various intervals. IndexBoundsBuilder::BoundsTightness tightness; - bool arrayOrNullPresent = false; + // We check if the $in predicate satisfies conditions to be a covered null predicate on the + // basis of indexes, null intervals, and array intervals. + const bool entireNullIntervalMatchesPredicate = + detectIfEntireNullIntervalMatchesPredicate(ime, index); for (auto&& equality : ime->getEqualities()) { - translateEquality(equality, index, isHashed, oilOut, &tightness); - // The ordering invariant of oil has been violated by the call to translateEquality. - arrayOrNullPresent = arrayOrNullPresent || equality.type() == BSONType::jstNULL || - equality.type() == BSONType::Array; - if (tightness != IndexBoundsBuilder::EXACT) { - *tightnessOut = tightness; + // First, we generate the bounds the same way that we would do for an individual + // equality. This will set tightness to the value it should be if this equality is being + // considered in isolation. + IndexBoundsBuilder::translateEquality(equality, index, isHashed, oilOut, &tightness); + if (entireNullIntervalMatchesPredicate && + (BSONType::jstNULL == equality.type() || + (BSONType::Array == equality.type() && equality.Obj().isEmpty()))) { + // We may have a covered null query. In this case, we update both empty array and + // null interval tightness to EXACT_MAYBE_COVERED, as individually they would have a + // tightness of INEXACT_FETCH. However, we already know we will be able to cover + // these intervals together if we have appropriate projections. Note that any other + // intervals that cannot be covered may still require the query to use a FETCH. + tightness = IndexBoundsBuilder::EXACT_MAYBE_COVERED; } + IndexBoundsBuilder::_mergeTightness(tightness, *tightnessOut); } for (auto&& regex : ime->getRegexes()) { translateRegex(regex.get(), index, oilOut, &tightness); - if (tightness != IndexBoundsBuilder::EXACT) { - *tightnessOut = tightness; - } - } - - if (ime->hasNull()) { - // A null index key does not always match a null query value so we must fetch the - // doc and run a full comparison. See SERVER-4529. - // TODO: Do we already set the tightnessOut by calling translateEquality? - *tightnessOut = INEXACT_FETCH; - } - - if (ime->hasEmptyArray()) { - // Empty arrays are indexed as undefined. - BSONObjBuilder undefinedBob; - undefinedBob.appendUndefined(""); - oilOut->intervals.push_back(makePointInterval(undefinedBob.obj())); - *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; + IndexBoundsBuilder::_mergeTightness(tightness, *tightnessOut); } // Equalities are already sorted and deduped so unionize is unneccesary if no regexes // are present. Hashed indexes may also cause the bounds to be out-of-order. - // Arrays and nulls introduce multiple elements that neccesitate a sort and deduping. - if (!ime->getRegexes().empty() || index.type == IndexType::INDEX_HASHED || - arrayOrNullPresent) + // Arrays and nulls introduce multiple elements that necessitate a sort and deduping. + if (ime->hasNonScalarOrNonEmptyValues() || index.type == IndexType::INDEX_HASHED) { unionize(oilOut); + } + } else if (MatchExpression::GEO == expr->matchType()) { const GeoMatchExpression* gme = static_cast<const GeoMatchExpression*>(expr); if ("2dsphere" == elt.valueStringDataSafe()) { @@ -1315,6 +1392,7 @@ void IndexBoundsBuilder::translateEquality(const BSONElement& data, } std::sort(oil->intervals.begin(), oil->intervals.end(), IntervalComparison); + *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; } diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h index b48228328f5..d1067caa561 100644 --- a/src/mongo/db/query/index_bounds_builder.h +++ b/src/mongo/db/query/index_bounds_builder.h @@ -67,16 +67,28 @@ public: * increasing tightness. These values are used when we need to do comparison between two * BoundsTightness values. Such comparisons can answer questions such as "Does predicate * X have tighter or looser bounds than predicate Y?". + * + * These enum values are ordered from loosest to tightest. */ enum BoundsTightness { // Index bounds are inexact, and a fetch is required. INEXACT_FETCH = 0, - // Index bounds are inexact, but no fetch is required - INEXACT_COVERED = 1, + // Index bounds are inexact, and a fetch may be required depending on the projection. + // For example, a count $in query on null + a regex can be covered, but a find query with + // the same filter and no projection cannot. + INEXACT_MAYBE_COVERED = 1, + + // Index bounds are exact, but a fetch may be required depending on the projection. + // For example, a find query on null may be covered, depending on which fields we project + // out. + EXACT_MAYBE_COVERED = 2, + + // Index bounds are inexact, but no fetch is required. + INEXACT_COVERED = 3, // Index bounds are exact. - EXACT = 2 + EXACT = 4 }; /** @@ -301,6 +313,11 @@ private: OrderedIntervalList* oilOut, BoundsTightness* tightnessOut, interval_evaluation_tree::Builder* ietBuilder); + + /** + * Helper method for merging interval tightness for $in expressions. + */ + static void _mergeTightness(const BoundsTightness& tightness, BoundsTightness& tightnessOut); }; } // namespace mongo diff --git a/src/mongo/db/query/index_bounds_builder_eq_null_test.cpp b/src/mongo/db/query/index_bounds_builder_eq_null_test.cpp index af4cdf91303..31e17b6e2c8 100644 --- a/src/mongo/db/query/index_bounds_builder_eq_null_test.cpp +++ b/src/mongo/db/query/index_bounds_builder_eq_null_test.cpp @@ -48,7 +48,7 @@ void assertBoundsRepresentEqualsNull(const OrderedIntervalList& oil) { oil.intervals[1].compare(Interval(fromjson("{'': null, '': null}"), true, true))); } -TEST_F(IndexBoundsBuilderTest, TranslateExprEqualToNullIsInexactFetch) { +TEST_F(IndexBoundsBuilderTest, TranslateExprEqualToNullIsExactMaybeCovered) { BSONObj keyPattern = BSON("a" << 1); BSONElement elt = keyPattern.firstElement(); auto testIndex = buildSimpleIndexEntry(keyPattern); @@ -65,11 +65,11 @@ TEST_F(IndexBoundsBuilderTest, TranslateExprEqualToNullIsInexactFetch) { oil.intervals[0].compare(Interval(fromjson("{'': undefined, '': undefined}"), true, true))); ASSERT_EQUALS(Interval::INTERVAL_EQUALS, oil.intervals[1].compare(Interval(fromjson("{'': null, '': null}"), true, true))); - ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT_MAYBE_COVERED); assertIET(inputParamIdMap, ietBuilder, elt, testIndex, oil); } -TEST_F(IndexBoundsBuilderTest, TranslateEqualsToNullShouldBuildInexactBounds) { +TEST_F(IndexBoundsBuilderTest, TranslateEqualsToNullShouldBuildExactMaybeCoveredBounds) { BSONObj indexPattern = BSON("a" << 1); auto testIndex = buildSimpleIndexEntry(indexPattern); @@ -83,12 +83,12 @@ TEST_F(IndexBoundsBuilderTest, TranslateEqualsToNullShouldBuildInexactBounds) { expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness, &ietBuilder); ASSERT_EQUALS(oil.name, "a"); - ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT_MAYBE_COVERED); assertBoundsRepresentEqualsNull(oil); assertIET(inputParamIdMap, ietBuilder, indexPattern.firstElement(), testIndex, oil); } -TEST_F(IndexBoundsBuilderTest, TranslateDottedEqualsToNullShouldBuildInexactBounds) { +TEST_F(IndexBoundsBuilderTest, TranslateDottedEqualsToNullShouldBuildExactMaybeCoveredBounds) { BSONObj indexPattern = BSON("a.b" << 1); auto testIndex = buildSimpleIndexEntry(indexPattern); @@ -102,7 +102,9 @@ TEST_F(IndexBoundsBuilderTest, TranslateDottedEqualsToNullShouldBuildInexactBoun expr.get(), indexPattern.firstElement(), testIndex, &oil, &tightness, &ietBuilder); ASSERT_EQUALS(oil.name, "a.b"); - ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + // Depending on the query projection, this will either be converted to EXACT or to INEXACT_FETCH + // before we build an IXSCAN plan. + ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT_MAYBE_COVERED); assertBoundsRepresentEqualsNull(oil); assertIET(inputParamIdMap, ietBuilder, indexPattern.firstElement(), testIndex, oil); } diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index e05dfd8a5d1..dd75cbce48d 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -1114,47 +1114,13 @@ std::vector<std::unique_ptr<QuerySolutionNode>> QueryPlannerAccess::collapseEqui } /** - * Returns true if this is a null query that can retrieve all the information it needs directly from - * the index, and so does not need a FETCH stage on top of it. Returns false otherwise. + * This helper determines if a query can be covered depending on the query projection. */ -bool isCoveredNullQuery(const CanonicalQuery& query, - MatchExpression* root, - IndexTag* tag, - const vector<IndexEntry>& indices, - const QueryPlannerParams& params) { - // Sparse indexes and hashed indexes should not use this optimization as they will require a - // FETCH stage with a filter. - if (indices[tag->index].sparse || indices[tag->index].type == IndexType::INDEX_HASHED) { - return false; - } - - // When the index is not multikey, we can support a query on an indexed field searching for null - // values. This optimization can only be done when the index is not multikey, otherwise empty - // arrays in the collection will be treated as null/undefined by the index. When the index is - // multikey, we can support a query searching for both null and empty array values. - const auto multikeyIndex = indices[tag->index].multikey; - if (root->matchType() == MatchExpression::MatchType::MATCH_IN) { - // Check that the query matches null values, if the index is not multikey, or null and empty - // array values, if the index is multikey. Note that the query may match values other than - // null (and empty array). - const auto node = static_cast<const InMatchExpression*>(root); - if (!node->hasNull() || (multikeyIndex && !node->hasEmptyArray())) { - return false; - } - } else if (ComparisonMatchExpressionBase::isEquality(root->matchType()) && !multikeyIndex) { - // Check that the query matches null values. - const auto node = static_cast<const ComparisonMatchExpressionBase*>(root); - if (node->getData().type() != BSONType::jstNULL) { - return false; - } - } else { - return false; - } - +bool projNeedsFetch(const CanonicalQuery& query, const QueryPlannerParams& params) { // If nothing is being projected, the query is fully covered without a fetch. // This is trivially true for a count query. if (params.options & QueryPlannerParams::Options::IS_COUNT) { - return true; + return false; } // This optimization can only be used for find when the index covers the projection completely. @@ -1163,7 +1129,7 @@ bool isCoveredNullQuery(const CanonicalQuery& query, // in the multikey case). Hence, only find queries projecting _id are covered. auto proj = query.getProj(); if (!proj) { - return false; + return true; } // We can cover projections on _id and generated fields and expressions depending only on _id. @@ -1175,10 +1141,38 @@ bool isCoveredNullQuery(const CanonicalQuery& query, // Note that it is not possible to project onto dotted paths of _id here, since they may be // null or missing, and the index cannot differentiate between the two cases, so we would // still need a FETCH stage. - return projFields.size() == 1 && *projFields.begin() == "_id"; + if (projFields.size() == 1 && *projFields.begin() == "_id") { + return false; + } } - return false; + return true; +} + +/** + * This helper updates a MAYBE_COVERED query tightness to one of EXACT, INEXACT_COVERED, or + * INEXACT_FETCH, depending on whether we need a FETCH/filter to answer the query projection. + */ +void refineTightnessForMaybeCoveredQuery(const CanonicalQuery& query, + const QueryPlannerParams& params, + IndexBoundsBuilder::BoundsTightness& tightnessOut) { + // We need to refine the tightness in case we have a "MAYBE_COVERED" tightness bound which + // depends on the query's projection. We will not have information about the projection + // later on in order to make this determination, so we do it here. + const bool noFetchNeededForProj = !projNeedsFetch(query, params); + if (tightnessOut == IndexBoundsBuilder::EXACT_MAYBE_COVERED) { + if (noFetchNeededForProj) { + tightnessOut = IndexBoundsBuilder::EXACT; + } else { + tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; + } + } else if (tightnessOut == IndexBoundsBuilder::INEXACT_MAYBE_COVERED) { + if (noFetchNeededForProj) { + tightnessOut = IndexBoundsBuilder::INEXACT_COVERED; + } else { + tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; + } + } } bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query, @@ -1222,11 +1216,6 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query, // If we're here, we now know that 'child' can use an index directly and the index is // over the child's field. - // We need to track if this is a covered null query so that we can have this information - // at hand when handling the filter on an indexed AND. - scanState.isCoveredNullQuery = - isCoveredNullQuery(query, child, scanState.ixtag, indices, params); - // If 'child' is a NOT, then the tag we're interested in is on the NOT's // child node. if (MatchExpression::NOT == child->matchType()) { @@ -1259,6 +1248,7 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query, verify(scanState.currentIndexNumber == scanState.ixtag->index); scanState.tightness = IndexBoundsBuilder::INEXACT_FETCH; mergeWithLeafNode(child, &scanState); + refineTightnessForMaybeCoveredQuery(query, params, scanState.tightness); handleFilter(&scanState); } else { if (nullptr != scanState.currentScan.get()) { @@ -1278,6 +1268,7 @@ bool QueryPlannerAccess::processIndexScans(const CanonicalQuery& query, &scanState.tightness, scanState.getCurrentIETBuilder()); + refineTightnessForMaybeCoveredQuery(query, params, scanState.tightness); handleFilter(&scanState); } } @@ -1693,6 +1684,12 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::_buildIndexedDataAccess( return soln; } + // We may be able to avoid adding an extra fetch stage even though the bounds are + // inexact, for instance if the query is counting null values on an indexed field + // without projecting that field. We therefore convert "MAYBE_COVERED" bounds into + // either EXACT or INEXACT, depending on the query projection. + refineTightnessForMaybeCoveredQuery(query, params, tightness); + // If the bounds are exact, the set of documents that satisfy the predicate is // exactly equal to the set of documents that the scan provides. // @@ -1700,11 +1697,7 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::_buildIndexedDataAccess( // superset of documents that satisfy the predicate, and we must check the // predicate. - // We may also be able to avoid adding an extra fetch stage even though the bounds are - // inexact because the query is counting null values on an indexed field without - // projecting that field. - if (tightness == IndexBoundsBuilder::EXACT || - isCoveredNullQuery(query, root, tag, indices, params)) { + if (tightness == IndexBoundsBuilder::EXACT) { return soln; } else if (tightness == IndexBoundsBuilder::INEXACT_COVERED && !indices[tag->index].multikey) { @@ -1850,10 +1843,9 @@ void QueryPlannerAccess::handleFilterAnd(ScanBuildingState* scanState) { // should always be affixed as a filter. We keep 'curChild' in the $and // for affixing later. ++scanState->curChild; - } else if (scanState->tightness == IndexBoundsBuilder::EXACT || scanState->isCoveredNullQuery) { - // The tightness of the bounds is exact or we are dealing with a covered null query. - // Either way, we want to remove this child so that when control returns to handleIndexedAnd - // we know that we don't need it to create a FETCH stage. + } else if (scanState->tightness == IndexBoundsBuilder::EXACT) { + // The tightness of the bounds is exact. We want to remove this child so that when control + // returns to handleIndexedAnd we know that we don't need it to create a FETCH stage. root->getChildVector()->erase(root->getChildVector()->begin() + scanState->curChild); } else if (scanState->tightness == IndexBoundsBuilder::INEXACT_COVERED && (INDEX_TEXT == index.type || !index.multikey)) { diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h index 6ea44830415..bd9c856ffab 100644 --- a/src/mongo/db/query/planner_access.h +++ b/src/mongo/db/query/planner_access.h @@ -145,11 +145,9 @@ private: struct ScanBuildingState { ScanBuildingState(MatchExpression* theRoot, const std::vector<IndexEntry>& indexList, - bool inArrayOp, - bool isCoveredNull = false) + bool inArrayOp) : root(theRoot), inArrayOperator(inArrayOp), - isCoveredNullQuery(isCoveredNull), indices(indexList), currentScan(nullptr), curChild(0), @@ -188,9 +186,6 @@ private: // Are we inside an array operator such as $elemMatch or $all? bool inArrayOperator; - // Is this a covered null query? - bool isCoveredNullQuery; - // A list of relevant indices which 'root' may be tagged to use. const std::vector<IndexEntry>& indices; |