/** * Test to verify that null queries can be fully covered by an index. * @tags: [ * assumes_unsharded_collection, * requires_non_retryable_writes, * requires_fcv_62, * # This test could produce unexpected explain output if additional indexes are created. * assumes_no_implicit_index_creation, * # TODO SERVER-67506: Dotted path equality to null matches non-object array elements in CQF. * cqf_incompatible, * ] */ (function() { "use strict"; load("jstests/aggregation/extras/utils.js"); // For arrayEq(). load("jstests/libs/analyze_plan.js"); // For getAggPlanStages() and getPlanStages(). load("jstests/libs/clustered_collections/clustered_collection_util.js"); const coll = db.cover_null_queries; coll.drop(); assert.commandWorked(coll.insertMany([ {_id: 1, a: 1, b: 1}, {_id: 2, a: 1, b: null}, {_id: 3, a: null, b: 1}, {_id: 4, a: null, b: null}, {_id: 5, a: 2}, {_id: 6, b: 2}, {_id: 7}, ])); /** * Validates that the explain() of command 'cmdObj' has the stages in 'expectedStages'. * * The field 'expectedStages' should be specified as an object with keys matching the stages * expected in the explain output and values indicating how many times each stage should be expected * to appear in the output (useful to verify if there are two COUNT_SCANS or that there are 0 * IX_SCANS, for example). * * The field 'isAgg' is a boolean indicating whether or not the command is an aggregation. */ function validateStages({cmdObj, expectedStages, isAgg}) { const explainObj = assert.commandWorked(coll.runCommand({explain: cmdObj})); for (const [expectedStage, count] of Object.entries(expectedStages)) { const planStages = isAgg ? getAggPlanStages(explainObj, expectedStage, /* useQueryPlannerSection */ true) : getPlanStages(explainObj, expectedStage); assert.eq(planStages.length, count, {foundStages: planStages, explain: explainObj}); if (count > 0) { for (const planStage of planStages) { assert.eq(planStage.stage, expectedStage, planStage); } } } } /** * Runs find command with the given 'filter' and 'projection' and validates that the output returned * matches 'expectedOutput'. Also runs explain() command on the same find command and validates that * all the 'expectedStages' are present in the plan returned. */ function validateFindCmdOutputAndPlan({filter, projection, expectedStages, expectedOutput}) { const cmdObj = {find: coll.getName(), filter: filter, projection: projection}; // Compare index output with expected output. if (expectedOutput) { const res = assert.commandWorked(coll.runCommand(cmdObj)); const ouputArray = new DBCommandCursor(coll.getDB(), res).toArray(); assert(arrayEq(expectedOutput, ouputArray), ouputArray); } // Validate explain. validateStages({cmdObj, expectedStages}); // Verify that we get the same output as we expect without an index. const noIndexCmdObj = Object.assign(cmdObj, {hint: {$natural: 1}}); const resNoIndex = assert.commandWorked(coll.runCommand(noIndexCmdObj)); const noIndexOutArr = new DBCommandCursor(coll.getDB(), resNoIndex).toArray(); assert(arrayEq(expectedOutput, noIndexOutArr), noIndexOutArr); } /** * Runs count command with the 'filter' and validates that the output returned matches * 'expectedOutput'. Also runs explain() command and validates that all the 'expectedStages' * are present in the plan returned. */ function validateSimpleCountCmdOutputAndPlan({filter, expectedStages, expectedCount}) { // Compare index output with expected output. const cmdObj = {count: coll.getName(), query: filter}; const res = assert.commandWorked(coll.runCommand(cmdObj)); assert.eq(res.n, expectedCount); // Validate explain. validateStages({cmdObj, expectedStages}); // Verify that we get the same output with and without an index. const noIndexCmdObj = Object.assign(cmdObj, {hint: {$natural: 1}}); const resNoIndex = assert.commandWorked(coll.runCommand(noIndexCmdObj)); assert.eq(resNoIndex.n, expectedCount); } /** * Runs an aggregation with a $count stage with the 'filter' applied to the $match stage and * validates that the count returned matches 'expectedCount'. Also runs explain() command on the * and validates that all the 'expectedStages' are present in the plan returned. */ function validateCountAggCmdOutputAndPlan({filter, expectedStages, expectedCount, pipeline}) { const cmdObj = { aggregate: coll.getName(), pipeline: pipeline || [{$match: filter}, {$count: "count"}], cursor: {}, }; // Compare index output with expected output. const cmdRes = assert.commandWorked(coll.runCommand(cmdObj)); const countRes = cmdRes.cursor.firstBatch; assert.eq(countRes.length, 1, cmdRes); assert.eq(countRes[0].count, expectedCount, countRes); // Validate explain. validateStages({cmdObj, expectedStages, isAgg: true}); // Verify that we get the same output as we expect without an index. const noIndexCmdObj = Object.assign(cmdObj, {hint: {$natural: 1}}); const resNoIndex = assert.commandWorked(coll.runCommand(noIndexCmdObj)); const countResNoIndex = resNoIndex.cursor.firstBatch; assert.eq(countResNoIndex.length, 1, cmdRes); assert.eq(countResNoIndex[0].count, expectedCount, countRes); } /** * Same as above, but uses a $group count. */ function validateGroupCountAggCmdOutputAndPlan({filter, expectedStages, expectedCount}) { validateCountAggCmdOutputAndPlan({ expectedStages, expectedCount, pipeline: [{$match: filter}, {$group: {_id: 0, count: {$count: {}}}}] }); } function getExpectedStagesIndexScanAndFetch(extraStages) { const clustered = ClusteredCollectionUtil.areAllCollectionsClustered(db.getMongo()); const result = clustered ? {"CLUSTERED_IXSCAN": 1} : {"FETCH": 1, "IXSCAN": 1}; for (const stage in extraStages) { result[stage] = extraStages[stage]; } return result; } assert.commandWorked(coll.createIndex({a: 1, _id: 1})); // Verify count({a: null}) can be covered by an index. In the simplest case we can use two count // scans joined by an OR to evaluate it. validateSimpleCountCmdOutputAndPlan({ filter: {a: null}, expectedCount: 4, expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0}, }); // Verify $count stage in aggregation matching {a: null} yields the same plan. validateCountAggCmdOutputAndPlan({ filter: {a: null}, expectedCount: 4, expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0}, }); // Verify find({a: null}, {_id: 1}) can be covered by an index. validateFindCmdOutputAndPlan({ filter: {a: null}, projection: {_id: 1}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); // Verify that the above queries can be covered by an index when the predicate is a $in. These are // not supported by a COUNT_SCAN because they are not the strict null equality predicate. validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [null, 2]}}, expectedCount: 5, expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: {$in: [null, 2]}}, expectedCount: 5, expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, 2]}}, projection: {_id: 1}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); // Same as above, but using a different ordering in the $in clause. validateSimpleCountCmdOutputAndPlan({ filter: {a: {$in: [2, null]}}, expectedCount: 5, expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); validateFindCmdOutputAndPlan({ filter: {a: {$in: [2, null]}}, projection: {_id: 1}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); // We can cover a $in with null and an empty array predicate. validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, []]}}, projection: {_id: 1}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: {$in: [null, []]}}, expectedCount: 4, expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); // We cannot cover a $in with null and an array predicate. // TODO SERVER-71058: It should be possible to cover this case and the more general case of matching // an array on a non-multikey index. validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, ["a"]]}}, projection: {_id: 1}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: {$in: [null, ["a"]]}}, expectedCount: 4, expectedStages: {"IXSCAN": 1, "FETCH": 1, "COUNT": 1}, }); // Verify that a more complex projection that only relies on the _id field does not need a FETCH. validateFindCmdOutputAndPlan({ filter: {a: null}, projection: {_id: 1, incr_id: {$add: [1, "$_id"]}}, expectedOutput: [{_id: 3, incr_id: 4}, {_id: 4, incr_id: 5}, {_id: 6, incr_id: 7}, {_id: 7, incr_id: 8}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_DEFAULT": 1}, }); // Verify that a more complex projection that computes a new field based on _id but excludes the _id // field does not require a fetch stage. validateFindCmdOutputAndPlan({ filter: {a: null}, projection: {_id: 0, incr_id: {$add: [1, "$_id"]}}, expectedOutput: [{incr_id: 4}, {incr_id: 5}, {incr_id: 7}, {incr_id: 8}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_DEFAULT": 1}, }); // Verify that a more complex projection that relies on any non-_id field does need a FETCH. validateFindCmdOutputAndPlan({ filter: {a: null}, projection: {_id: 1, incr_id: {$add: ["$a", "$_id"]}}, expectedOutput: [ {_id: 3, incr_id: null}, {_id: 4, incr_id: null}, {_id: 6, incr_id: null}, {_id: 7, incr_id: null} ], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_DEFAULT": 1}, }); // Verify that an exclusion projection does need a FETCH. validateFindCmdOutputAndPlan({ filter: {a: null}, projection: {a: 0, b: 0}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); // Verify find({a: null}, {_id: 1, b: 1}) is not covered by an index so we still have a FETCH stage. validateFindCmdOutputAndPlan({ filter: {a: null}, projection: {_id: 1, b: 1}, expectedOutput: [{_id: 3, b: 1}, {_id: 4, b: null}, {_id: 6, b: 2}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, 2]}}, projection: {_id: 1, b: 1}, expectedOutput: [{_id: 3, b: 1}, {_id: 4, b: null}, {_id: 5}, {_id: 6, b: 2}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); // Verify find({a: null}, {a: 1}) still has a FETCH stage because the index alone cannot determine // if the value of field a is null, undefined, or missing. validateFindCmdOutputAndPlan({ filter: {a: null}, projection: {a: 1}, expectedOutput: [{_id: 3, a: null}, {_id: 4, a: null}, {_id: 6}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, 2]}}, projection: {a: 1}, expectedOutput: [{_id: 3, a: null}, {_id: 4, a: null}, {_id: 5, a: 2}, {_id: 6}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); // For exclusion projects we always need a FETCH stage. validateFindCmdOutputAndPlan({ filter: {a: null}, projection: {a: 1, _id: 0}, expectedOutput: [{a: null}, {a: null}, {}, {}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); // Verify that if the index is multikey, this optimization cannot be applied when just querying for // null values, as the index alone cannot differentiate between null and []. assert.commandWorked(coll.insertOne({_id: 8, a: []})); assert.commandWorked(coll.insertOne({_id: 9, a: [[]]})); assert.commandWorked(coll.insertOne({_id: 10, a: [null, []]})); validateSimpleCountCmdOutputAndPlan({ filter: {a: null}, expectedCount: 5, expectedStages: {"FETCH": 1, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0} }); validateCountAggCmdOutputAndPlan({ filter: {a: null}, expectedCount: 5, expectedStages: {"FETCH": 1, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0}, }); validateFindCmdOutputAndPlan({ filter: {a: null}, projection: {_id: 1}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 10}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, 2]}}, projection: {_id: 1}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 5}, {_id: 6}, {_id: 7}, {_id: 10}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: null, _id: 3}, expectedCount: 1, expectedStages: getExpectedStagesIndexScanAndFetch({"OR": 0, "COUNT_SCAN": 0}), }); validateCountAggCmdOutputAndPlan({ filter: {a: null, _id: 3}, expectedCount: 1, expectedStages: getExpectedStagesIndexScanAndFetch({"OR": 0, "COUNT_SCAN": 0}), }); validateFindCmdOutputAndPlan({ filter: {a: null, _id: 3}, projection: {_id: 1}, expectedOutput: [{_id: 3}], expectedStages: getExpectedStagesIndexScanAndFetch({"PROJECTION_SIMPLE": 1}), }); // Verify that if the index is multikey and the query searches for null and empty array values, then // the find does not require a FETCH stage, and a count can replace the IXSCAN with three // COUNT_SCANS joined by an OR. validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, []]}}, projection: {_id: 1}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 8}, {_id: 9}, {_id: 10}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [null, []]}}, expectedCount: 7, expectedStages: {"FETCH": 0, "IXSCAN": 0, "OR": 1, "COUNT_SCAN": 3}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: {$in: [null, []]}}, expectedCount: 7, expectedStages: {"OR": 1, "COUNT_SCAN": 3, "IXSCAN": 0, "FETCH": 0}, }); // Same as above, but using a different ordering in the $in clause. validateFindCmdOutputAndPlan({ filter: {a: {$in: [[], null]}}, projection: {_id: 1}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 8}, {_id: 9}, {_id: 10}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [[], null]}}, expectedCount: 7, expectedStages: {"FETCH": 0, "IXSCAN": 0, "OR": 1, "COUNT_SCAN": 3}, }); // Verify that the same optimization is supported when using $or syntax. validateFindCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: []}]}, projection: {_id: 1}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 8}, {_id: 9}, {_id: 10}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateCountAggCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: []}]}, expectedCount: 7, expectedStages: {"FETCH": 0, "IXSCAN": 0, "OR": 1, "COUNT_SCAN": 3}, }); validateSimpleCountCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: []}]}, expectedCount: 7, expectedStages: {"OR": 1, "COUNT_SCAN": 3, "IXSCAN": 0, "FETCH": 0}, }); // Same as above, but using a different ordering in the $in clauses. validateFindCmdOutputAndPlan({ filter: {$or: [{a: []}, {a: null}]}, projection: {_id: 1}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 8}, {_id: 9}, {_id: 10}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateCountAggCmdOutputAndPlan({ filter: {$or: [{a: []}, {a: null}]}, expectedCount: 7, expectedStages: {"FETCH": 0, "IXSCAN": 0, "OR": 1, "COUNT_SCAN": 3}, }); // Verify that if the index is multikey and the query searches for null, empty array, and other // values, then it does not require a FETCH, but we cannot replace the IXSCAN with COUNTs. validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, [], 1]}}, projection: {_id: 1}, expectedOutput: [{_id: 1}, {_id: 2}, {_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 8}, {_id: 9}, {_id: 10}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [null, [], 1]}}, expectedCount: 9, expectedStages: {"FETCH": 0, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: {$in: [null, [], 1]}}, expectedCount: 9, expectedStages: {"OR": 0, "COUNT_SCAN": 0, "IXSCAN": 1, "FETCH": 0}, }); // Verify that if the index is multikey and the query searches for null and empty array values, we // still fetch when projecting a field other than _id. validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, []]}}, projection: {_id: 1, a: 1}, expectedOutput: [ {_id: 3, a: null}, {_id: 4, a: null}, {_id: 6}, {_id: 7}, {_id: 8, a: []}, {_id: 9, a: [[]]}, {_id: 10, a: [[], null]} ], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, []]}}, projection: {_id: 1, b: 1}, expectedOutput: [ {_id: 3, b: 1}, {_id: 4, b: null}, {_id: 6, b: 2}, {_id: 7}, {_id: 8}, {_id: 9}, {_id: 10} ], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); assert.commandWorked(coll.dropIndexes()); assert.commandWorked(coll.createIndex({a: 1, b: 1, _id: 1})); // Verify that if the index is multikey and compound and the query matches null and empty array // values, then it does not require a FETCH stage and can replace the IXSCAN with COUNTs. validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, []]}}, projection: {_id: 1}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}, {_id: 8}, {_id: 9}, {_id: 10}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [null, []]}}, expectedCount: 7, expectedStages: {"FETCH": 0, "IXSCAN": 0, "OR": 1, "COUNT_SCAN": 3}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: {$in: [null, []]}}, expectedCount: 7, expectedStages: {"OR": 1, "COUNT_SCAN": 3, "IXSCAN": 0, "FETCH": 0}, }); // Verify that if the index is multikey and compound and the query has an added compound predicate, // then it does not require a FETCH and can replace the IXSCAN with COUNTs. validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, []]}, b: {$eq: 2}}, projection: {_id: 1}, expectedOutput: [{_id: 6}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [null, []]}, b: {$eq: 2}}, expectedCount: 1, expectedStages: {"FETCH": 0, "IXSCAN": 0, "OR": 1, "COUNT_SCAN": 3}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: {$in: [null, []]}, b: {$eq: 2}}, expectedCount: 1, expectedStages: {"OR": 1, "COUNT_SCAN": 3, "IXSCAN": 0, "FETCH": 0}, }); validateFindCmdOutputAndPlan({ filter: {a: 1, b: {$in: [null, []]}}, projection: {_id: 1}, expectedOutput: [{_id: 2}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateCountAggCmdOutputAndPlan({ filter: {a: 1, b: {$in: [null, []]}}, expectedCount: 1, expectedStages: {"FETCH": 0, "IXSCAN": 0, "OR": 1, "COUNT_SCAN": 3}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: 1, b: {$in: [null, []]}}, expectedCount: 1, expectedStages: {"OR": 1, "COUNT_SCAN": 3, "IXSCAN": 0, "FETCH": 0}, }); // Verify if the index is multikey and compound and the query searches for null, empty array, and // other values, then it does not require a FETCH, but we cannot replace the IXSCAN with COUNTs. validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, [], 1]}, b: {$eq: 2}}, projection: {_id: 1}, expectedOutput: [{_id: 6}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [null, [], 1]}, b: {$eq: 2}}, expectedCount: 1, expectedStages: {"FETCH": 0, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: {$in: [null, [], 1]}, b: {$eq: 2}}, expectedCount: 1, expectedStages: {"OR": 0, "COUNT_SCAN": 0, "IXSCAN": 1, "FETCH": 0}, }); // Same as above, but using a different ordering in the $in clauses. validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, 1, []]}, b: {$eq: 2}}, projection: {_id: 1}, expectedOutput: [{_id: 6}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [null, 1, []]}, b: {$eq: 2}}, expectedCount: 1, expectedStages: {"FETCH": 0, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0}, }); // Verify if the index is multikey and compound and the query searches for null, empty array, and // other values for multiple fields, then it does not require a FETCH but cannot replace the IXSCAN // with COUNTs because there are multiple null intervals or because the intervals are complex. validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [null, []]}, b: {$in: [null, []]}}, expectedCount: 5, expectedStages: {"FETCH": 0, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0}, }); validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [null, [], 1]}, b: {$in: [null, []]}}, expectedCount: 6, expectedStages: {"FETCH": 0, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0}, }); validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [null, []]}, b: {$in: [null, [], 4]}}, expectedCount: 5, expectedStages: {"FETCH": 0, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0}, }); validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [null, [], 1]}, b: {$in: [null, [], 4]}}, expectedCount: 6, expectedStages: {"FETCH": 0, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0}, }); // Verify if the index is multikey and compound, the query predicate must match null and empty array // values for all queried fields. validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [null, []]}, b: null}, expectedCount: 5, expectedStages: {"FETCH": 1, "IXSCAN": 1, "OR": 0, "COUNT_SCAN": 0}, }); assert.commandWorked(coll.deleteMany({_id: {$in: [8, 9, 10]}})); // Same as above but when the index is not multikey. In this case, we can cover the query, but we // cannot do the COUNT_SCAN optimization because there are multiple null intervals. assert.commandWorked(coll.dropIndexes()); assert.commandWorked(coll.createIndex({a: 1, b: 1, _id: 1})); validateFindCmdOutputAndPlan({ filter: {a: {$in: [null, []]}, b: null}, projection: {_id: 1}, expectedOutput: [{_id: 4}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: {$in: [null, []]}, b: null}, expectedCount: 2, expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); // Test case when query is fully covered but we still need to fetch to correctly project a field. assert.commandWorked(coll.dropIndexes()); assert.commandWorked(coll.createIndex({a: 1, _id: 1})); assert.commandWorked(coll.createIndex({a: 1, b: 1, _id: 1})); validateFindCmdOutputAndPlan({ filter: {a: null}, projection: {_id: 1, b: 1}, expectedOutput: [ {_id: 3, b: 1}, {_id: 4, b: null}, {_id: 6, b: 2}, {_id: 7}, ], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); // Test case when query is fully covered but predicate is not a single interval. assert.commandWorked(coll.dropIndexes()); assert.commandWorked(coll.createIndex({a: 1, b: 1, _id: 1})); validateFindCmdOutputAndPlan({ filter: {a: {$in: [1, 2, 3]}, b: null}, projection: {_id: 1}, expectedOutput: [{_id: 2}, {_id: 5}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); // Note that we can't use a COUNT_SCAN here because we have a complex interval. validateSimpleCountCmdOutputAndPlan({ filter: {a: {$in: [1, 2, 3]}, b: null}, expectedCount: 2, expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [1, 2, 3]}, b: null}, expectedCount: 2, expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); validateFindCmdOutputAndPlan({ filter: {a: 1, b: null}, projection: {_id: 1}, expectedOutput: [{_id: 2}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: 1, b: null}, expectedCount: 1, expectedStages: {"OR": 1, "COUNT_SCAN": 2, "FETCH": 0}, }); validateCountAggCmdOutputAndPlan({ filter: {a: 1, b: null}, expectedCount: 1, expectedStages: {"OR": 1, "COUNT_SCAN": 2, "FETCH": 0}, }); validateFindCmdOutputAndPlan({ filter: {a: 1, b: {$in: [null, 1]}}, projection: {_id: 1}, expectedOutput: [{_id: 1}, {_id: 2}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: 1, b: {$in: [null, 1]}}, expectedCount: 2, expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); validateCountAggCmdOutputAndPlan({ filter: {a: 1, b: {$in: [null, 1]}}, expectedCount: 2, expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); // Test case when counting nulls where documents are sorted in the opposite direction as the index. assert.commandWorked(coll.dropIndexes()); assert.commandWorked(coll.createIndex({a: -1, b: -1})); validateCountAggCmdOutputAndPlan({ expectedCount: 2, expectedStages: {"OR": 1, "COUNT_SCAN": 2, "FETCH": 0}, pipeline: /* Sort by field a in the opposite direction of the index. */ [{$match: {a: null, b: {$gt: 0}}}, {$sort: {a: 1}}, {$count: "count"}], }); // Test case when query is fully covered, predicate is not a single interval, and the index does not // include the _id field. A find projection in this case will not be covered, but any count should // be covered. assert.commandWorked(coll.dropIndexes()); assert.commandWorked(coll.createIndex({a: 1, b: 1})); validateFindCmdOutputAndPlan({ filter: {a: {$in: [1, 2, 3]}, b: null}, projection: {_id: 1}, expectedOutput: [{_id: 2}, {_id: 5}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); // Note that we can't use a COUNT_SCAN here because we have a complex interval. validateSimpleCountCmdOutputAndPlan({ filter: {a: {$in: [1, 2, 3]}, b: null}, expectedCount: 2, expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); validateCountAggCmdOutputAndPlan({ filter: {a: {$in: [1, 2, 3]}, b: null}, expectedCount: 2, expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); // Test index intersection plan. assert.commandWorked(coll.dropIndexes()); assert.commandWorked(coll.createIndex({a: 1, _id: 1})); assert.commandWorked(coll.createIndex({b: 1, _id: 1})); validateFindCmdOutputAndPlan({ filter: {a: null, b: null}, projection: {_id: 1}, expectedOutput: [{_id: 4}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); validateFindCmdOutputAndPlan({ filter: {a: null, b: 1}, projection: {_id: 1}, expectedOutput: [{_id: 3}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); // Verify the case where id field is accessed along a dotted path. // We still need a FETCH in this case because the index cannot differentiate between null and // missing values within _id, e.g. {_id: {x: null}} vs. {_id: {}} would both be returned as // {_id: {x: null}} by the index. assert.commandWorked(coll.dropIndexes()); assert.commandWorked(coll.deleteMany({})); assert.commandWorked(coll.createIndex({a: 1, "_id.x": 1})); assert.commandWorked(coll.insertMany([ {a: null, _id: {x: 1}}, {a: null, _id: {x: 1, y: 1}}, {a: null, _id: {y: 1}}, {_id: {x: 1, y: 2}}, {a: "not null", _id: {x: 3}}, ])); validateFindCmdOutputAndPlan({ filter: {a: null}, projection: {"_id.x": 1}, expectedOutput: [{_id: {x: 1}}, {_id: {x: 1}}, {_id: {}}, {_id: {x: 1}}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_DEFAULT": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: null}, expectedCount: 4, expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0}, }); validateCountAggCmdOutputAndPlan({ filter: {a: null}, expectedCount: 4, expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0}, }); // Validate that we can use the optimization when we have regex without array elements in a $in or // $or. See SERVER-70436 for more details. coll.drop(); assert.commandWorked(coll.insertMany([ {_id: 1, a: '123456'}, {_id: 2, a: '1234567'}, {_id: 3, a: ' 12345678'}, {_id: 4, a: '444456'}, {_id: 5, a: ''}, {_id: 6, a: null}, {_id: 7}, ])); assert.commandWorked(coll.createIndex({a: 1, _id: 1})); // TODO SERVER-70998: Can apply optimization in case without regex; however, we still can't use a // COUNT_SCAN in this case. validateFindCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: ""}]}, projection: {_id: 1}, expectedOutput: [{_id: 5}, {_id: 6}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: ""}]}, expectedCount: 3, expectedStages: {"COUNT": 1, "IXSCAN": 1, "FETCH": 0}, }); validateGroupCountAggCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: ""}]}, expectedCount: 3, expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); // Can still apply optimization when we have regex. validateFindCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: {$regex: "^$"}}]}, projection: {_id: 1}, expectedOutput: [{_id: 5}, {_id: 6}, {_id: 7}], expectedStages: {"IXSCAN": 1, "FETCH": 0, "PROJECTION_COVERED": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: {$regex: "^$"}}]}, expectedCount: 3, expectedStages: {"IXSCAN": 1, "FETCH": 0, "COUNT": 1}, }); validateGroupCountAggCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: {$regex: "^$"}}]}, expectedCount: 3, expectedStages: {"IXSCAN": 1, "FETCH": 0}, }); // Now test case with a multikey index. We can't leverage the optimization here. assert.commandWorked(coll.insert({_id: 8, a: [1, 2, 3]})); assert.commandWorked(coll.insert({_id: 9, a: []})); validateFindCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: []}, {a: {$regex: "^$"}}]}, projection: {_id: 1}, expectedOutput: [{_id: 5}, {_id: 6}, {_id: 7}, {_id: 9}], expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: []}, {a: {$regex: "^$"}}]}, expectedCount: 4, expectedStages: {"COUNT": 1, "IXSCAN": 1, "FETCH": 1}, }); validateGroupCountAggCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: []}, {a: {$regex: "^$"}}]}, expectedCount: 4, expectedStages: {"IXSCAN": 1, "FETCH": 1}, }); // We also shouldn't cover queries on multikey indexes where $in includes an array, as we will still // need a filter after the IXSCAN to correctly return validateFindCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: []}, {a: [2]}]}, projection: {_id: 1}, expectedOutput: [{_id: 6}, {_id: 7}, {_id: 9}], expectedStages: {"IXSCAN": 1, "FETCH": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: []}, {a: [2]}]}, expectedCount: 3, expectedStages: {"IXSCAN": 1, "FETCH": 1}, }); validateGroupCountAggCmdOutputAndPlan({ filter: {$or: [{a: null}, {a: []}, {a: [2]}]}, expectedCount: 3, expectedStages: {"IXSCAN": 1, "FETCH": 1}, }); // Validate that when we have a dotted path, we return the correct results for null queries. coll.drop(); assert.commandWorked(coll.insertMany([ {_id: 1, a: 1}, {_id: 2, a: null}, {_id: 3}, {_id: 4, a: {b: 1}}, {_id: 5, a: {b: null}}, {_id: 6, a: {c: 1}}, ])); assert.commandWorked(coll.createIndex({"a.b": 1, _id: 1})); validateFindCmdOutputAndPlan({ filter: {"a.b": null}, projection: {_id: 1}, expectedOutput: [{_id: 1}, {_id: 2}, {_id: 3}, {_id: 5}, {_id: 6}], expectedStages: {"IXSCAN": 1, "PROJECTION_COVERED": 1, "FETCH": 0}, }); validateSimpleCountCmdOutputAndPlan({ filter: {"a.b": null}, expectedCount: 5, expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0}, }); validateGroupCountAggCmdOutputAndPlan({ filter: {"a.b": null}, expectedCount: 5, expectedStages: {"OR": 1, "COUNT_SCAN": 2, "IXSCAN": 0, "FETCH": 0}, }); validateFindCmdOutputAndPlan({ filter: {a: {b: null}}, projection: {_id: 1}, expectedOutput: [{_id: 5}], expectedStages: {"COLLSCAN": 1, "PROJECTION_SIMPLE": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: {b: null}}, expectedCount: 1, expectedStages: {"COLLSCAN": 1}, }); validateGroupCountAggCmdOutputAndPlan({ filter: {a: {b: null}}, expectedCount: 1, expectedStages: {"COLLSCAN": 1}, }); // Still need fetch if we don't have a sufficiently restrictive projection. validateFindCmdOutputAndPlan({ filter: {"a.b": null}, projection: {_id: 1, a: 1}, expectedOutput: [ {_id: 1, a: 1}, {_id: 2, a: null}, {_id: 3}, {_id: 5, a: {b: null}}, {_id: 6, a: {c: 1}}, ], expectedStages: {"IXSCAN": 1, "FETCH": 1}, }); // Make index multikey, and test case where field b is nested in an array. assert.commandWorked(coll.insertMany([ {_id: 7, a: [{b: null}]}, {_id: 8, a: [{b: []}]}, {_id: 9, a: [{b: [1, 2, 3]}]}, {_id: 10, a: [{b: 123}]}, {_id: 11, a: [{c: 123}]}, {_id: 12, a: []}, {_id: 13, a: [{}]}, {_id: 14, a: [1, 2, 3]}, {_id: 15, a: [{b: 1}, {c: 2}, {b: 3}]}, {_id: 16, a: [null]}, ])); validateFindCmdOutputAndPlan({ filter: {"a.b": null}, projection: {_id: 1}, expectedOutput: [ {_id: 1}, {_id: 2}, {_id: 3}, {_id: 5}, {_id: 6}, {_id: 7}, {_id: 11}, {_id: 13}, {_id: 15} ], expectedStages: {"IXSCAN": 1, "PROJECTION_SIMPLE": 1, "FETCH": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {"a.b": null}, expectedCount: 9, expectedStages: {"COUNT": 1, "IXSCAN": 1, "FETCH": 1}, }); validateGroupCountAggCmdOutputAndPlan({ filter: {"a.b": null}, expectedCount: 9, expectedStages: {"IXSCAN": 1, "FETCH": 1}, }); validateFindCmdOutputAndPlan({ filter: {a: {b: null}}, projection: {_id: 1}, expectedOutput: [{_id: 5}, {_id: 7}], expectedStages: { "COLLSCAN": 1, "PROJECTION_SIMPLE": 1, }, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: {b: null}}, expectedCount: 2, expectedStages: {"COLLSCAN": 1}, }); validateGroupCountAggCmdOutputAndPlan({ filter: {a: {b: null}}, expectedCount: 2, expectedStages: {"COLLSCAN": 1}, }); validateFindCmdOutputAndPlan({ filter: {a: [{b: null}]}, projection: {_id: 1}, expectedOutput: [{_id: 7}], expectedStages: {"COLLSCAN": 1, "PROJECTION_SIMPLE": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {a: [{b: null}]}, expectedCount: 1, expectedStages: {"COLLSCAN": 1}, }); validateGroupCountAggCmdOutputAndPlan({ filter: {a: [{b: null}]}, expectedCount: 1, expectedStages: {"COLLSCAN": 1}, }); // We still need a FETCH for composite paths, because both {a: [1,2,3]} and {"a.b": null} generate // null index keys, but the former should not match the predicate below. validateFindCmdOutputAndPlan({ filter: {"a.b": {$in: [null, []]}}, projection: {_id: 1}, expectedOutput: [ {_id: 1}, {_id: 2}, {_id: 3}, {_id: 5}, {_id: 6}, {_id: 7}, {_id: 8}, {_id: 11}, {_id: 13}, {_id: 15} ], expectedStages: {"IXSCAN": 1, "PROJECTION_SIMPLE": 1, "FETCH": 1}, }); validateSimpleCountCmdOutputAndPlan({ filter: {"a.b": {$in: [null, []]}}, expectedCount: 10, expectedStages: {"IXSCAN": 1, "FETCH": 1}, }); validateGroupCountAggCmdOutputAndPlan({ filter: {"a.b": {$in: [null, []]}}, expectedCount: 10, expectedStages: {"IXSCAN": 1, "FETCH": 1}, }); })();