diff options
Diffstat (limited to 'jstests/core/query/distinct/distinct_multikey_dotted_path.js')
-rw-r--r-- | jstests/core/query/distinct/distinct_multikey_dotted_path.js | 209 |
1 files changed, 209 insertions, 0 deletions
diff --git a/jstests/core/query/distinct/distinct_multikey_dotted_path.js b/jstests/core/query/distinct/distinct_multikey_dotted_path.js new file mode 100644 index 00000000000..c8530fe6799 --- /dev/null +++ b/jstests/core/query/distinct/distinct_multikey_dotted_path.js @@ -0,0 +1,209 @@ +/** + * Test distinct() on multikey indexes using a dotted path. + * + * Assumes the collection is not sharded, because sharding the collection could result in different + * plans being chosen on different shards (for example, if an index is multikey on one shard but + * not another). + * Doesn't support stepdowns because it runs explain() on an aggregation (which can apparently + * return incomplete results). + * @tags: [ + * assumes_unsharded_collection, + * does_not_support_stepdowns, + * ] + */ +(function() { +"use strict"; +load("jstests/libs/analyze_plan.js"); // For planHasStage(). + +const coll = db.distinct_multikey; +coll.drop(); +assert.commandWorked(coll.createIndex({"a.b.c": 1})); + +assert.commandWorked(coll.insert({a: {b: {c: 1}}})); +assert.commandWorked(coll.insert({a: {b: {c: 2}}})); +assert.commandWorked(coll.insert({a: {b: {c: 3}}})); +assert.commandWorked(coll.insert({a: {b: {notRelevant: 3}}})); +assert.commandWorked(coll.insert({a: {notRelevant: 3}})); + +const numPredicate = { + "a.b.c": {$gt: 0} +}; + +function getAggPipelineForDistinct(path) { + return [{$group: {_id: "$" + path}}]; +} + +// Run an agg pipeline with a $group, and convert the results so they're equivalent +// to what a distinct() would return. +// Note that $group will treat an array as its own key rather than unwinding it. This means +// that a $group on a field that's multikey will have different behavior than a distinct(), so +// we only use this function for non-multikey fields. +function distinctResultsFromPipeline(pipeline) { + const res = coll.aggregate(pipeline).toArray(); + return res.map((x) => x._id); +} + +// Be sure a distinct scan is used when the index is not multi key. +(function testDistinctWithNonMultikeyIndex() { + const results = coll.distinct("a.b.c"); + // TODO SERVER-14832: Returning 'null' here is inconsistent with the behavior when no index + // is present. + assert.sameMembers([1, 2, 3, null], results); + + const expl = coll.explain().distinct("a.b.c"); + assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN"), expl); + + // Do an equivalent query using $group. + const pipeline = getAggPipelineForDistinct("a.b.c"); + const aggResults = distinctResultsFromPipeline(pipeline); + assert.sameMembers(aggResults, results); + const aggExpl = assert.commandWorked(coll.explain().aggregate(pipeline)); + assert.gt(getAggPlanStages(aggExpl, "DISTINCT_SCAN").length, 0); +})(); + +// Distinct with a predicate. +(function testDistinctWithPredWithNonMultikeyIndex() { + const results = coll.distinct("a.b.c", numPredicate); + assert.sameMembers([1, 2, 3], results); + + const expl = coll.explain().distinct("a.b.c", numPredicate); + + assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN"), expl); + + const pipeline = [{$match: numPredicate}].concat(getAggPipelineForDistinct("a.b.c")); + const aggResults = distinctResultsFromPipeline(pipeline); + assert.sameMembers(aggResults, results); + const aggExpl = assert.commandWorked(coll.explain().aggregate(pipeline)); + assert.gt(getAggPlanStages(aggExpl, "DISTINCT_SCAN").length, 0); +})(); + +// Make the index multi key. +assert.commandWorked(coll.insert({a: {b: [{c: 4}, {c: 5}]}})); +assert.commandWorked(coll.insert({a: {b: [{c: 4}, {c: 6}]}})); +// Empty array is indexed as 'undefined'. +assert.commandWorked(coll.insert({a: {b: {c: []}}})); + +// We should still use the index as long as the path we distinct() on is never an array +// index. +(function testDistinctWithMultikeyIndex() { + const multiKeyResults = coll.distinct("a.b.c"); + // TODO SERVER-14832: Returning 'null' and 'undefined' here is inconsistent with the + // behavior when no index is present. + assert.sameMembers([1, 2, 3, 4, 5, 6, null, undefined], multiKeyResults); + const expl = coll.explain().distinct("a.b.c"); + + assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN")); + + // Not running same query with $group now that the field is multikey. See comment above. +})(); + +// We cannot use the DISTINCT_SCAN optimization when there is a multikey path in the key and +// there is a predicate. The reason is that we may have a predicate like {a: 4}, and two +// documents: {a: [4, 5]}, {a: [4, 6]}. With a DISTINCT_SCAN, we would "skip over" one of the +// documents, and leave out either '5' or '6', rather than providing the correct result of +// [4, 5, 6]. The test below is for a similar case. +(function testDistinctWithPredWithMultikeyIndex() { + const pred = {"a.b.c": 4}; + const results = coll.distinct("a.b.c", pred); + assert.sameMembers([4, 5, 6], results); + + const expl = coll.explain().distinct("a.b.c", pred); + const winningPlan = getWinningPlan(expl.queryPlanner); + assert.eq(false, planHasStage(db, winningPlan, "DISTINCT_SCAN"), expl); + assert.eq(true, planHasStage(db, winningPlan, "IXSCAN"), expl); + + // Not running same query with $group now that the field is multikey. See comment above. +})(); + +// Perform a distinct on a path where the last component is multikey. +(function testDistinctOnPathWhereLastComponentIsMultiKey() { + assert.commandWorked(coll.createIndex({"a.b": 1})); + const multiKeyResults = coll.distinct("a.b"); + assert.sameMembers( + [ + null, // From the document with no 'b' field. TODO SERVER-14832: this is + // inconsistent with behavior when no index is present. + {c: 1}, + {c: 2}, + {c: 3}, + {c: 4}, + {c: 5}, + {c: 6}, + {c: []}, + {notRelevant: 3} + ], + multiKeyResults); + + const expl = coll.explain().distinct("a.b"); + assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN")); + + // Not running same query with $group now that the field is multikey. See comment above. +})(); + +(function testDistinctOnPathWhereLastComponentIsMultiKeyWithPredicate() { + assert.commandWorked(coll.createIndex({"a.b": 1})); + const pred = {"a.b": {$type: "array"}}; + const multiKeyResults = coll.distinct("a.b", pred); + assert.sameMembers( + [ + {c: 4}, + {c: 5}, + {c: 6}, + ], + multiKeyResults); + + const expl = coll.explain().distinct("a.b", pred); + const winningPlan = getWinningPlan(expl.queryPlanner); + assert.eq(false, planHasStage(db, winningPlan, "DISTINCT_SCAN")); + assert.eq(true, planHasStage(db, winningPlan, "IXSCAN")); + + // Not running same query with $group now that the field is multikey. See comment above. +})(); + +// If the path we distinct() on includes an array index, a COLLSCAN should be used, +// even if an index is available on the prefix to the array component ("a.b" in this case). +(function testDistinctOnNumericMultikeyPathNoIndex() { + const res = coll.distinct("a.b.0"); + assert.eq(res, [{c: 4}]); + + const expl = coll.explain().distinct("a.b.0"); + assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "COLLSCAN"), expl); + + // Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will + // only treat '0' as a field name (not array index). +})(); + +// Creating an index on "a.b.0" and doing a distinct on it should be able to use DISTINCT_SCAN. +(function testDistinctOnNumericMultikeyPathWithIndex() { + assert.commandWorked(coll.createIndex({"a.b.0": 1})); + assert.commandWorked(coll.insert({a: {b: {0: "hello world"}}})); + const res = coll.distinct("a.b.0"); + assert.sameMembers(res, [{c: 4}, "hello world"]); + + const expl = coll.explain().distinct("a.b.0"); + assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN"), expl); + + // Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will + // only treat '0' as a field name (not array index). +})(); + +// Inserting an array on "a", creating an index on "a.b.0", and doing a distinct on it should use an +// IXSCAN, as "a" is now multikey. See explanation above about why a DISTINCT_SCAN cannot be used +// when the path given is multikey. +(function testDistinctWithPredOnNumericMultikeyPathWithIndex() { + const pred = {"a.b.0": {$type: "object"}}; + const res = coll.distinct("a.b.0", pred); + assert.sameMembers(res, [{c: 4}]); + + // Make "a" multikey in order to ensure that a DISTINCT_SCAN plan on "a.b.0" is not legal. + assert.commandWorked(coll.insert({a: [1, 2, 3]})); + + const expl = coll.explain().distinct("a.b.0", pred); + const winningPlan = getWinningPlan(expl.queryPlanner); + assert.eq(false, planHasStage(db, winningPlan, "DISTINCT_SCAN"), expl); + assert.eq(true, planHasStage(db, winningPlan, "IXSCAN"), expl); + + // Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will + // only treat '0' as a field name (not array index). +})(); +})(); |