summaryrefslogtreecommitdiff
path: root/jstests/core/query/distinct/distinct_multikey_dotted_path.js
diff options
context:
space:
mode:
Diffstat (limited to 'jstests/core/query/distinct/distinct_multikey_dotted_path.js')
-rw-r--r--jstests/core/query/distinct/distinct_multikey_dotted_path.js209
1 files changed, 209 insertions, 0 deletions
diff --git a/jstests/core/query/distinct/distinct_multikey_dotted_path.js b/jstests/core/query/distinct/distinct_multikey_dotted_path.js
new file mode 100644
index 00000000000..c8530fe6799
--- /dev/null
+++ b/jstests/core/query/distinct/distinct_multikey_dotted_path.js
@@ -0,0 +1,209 @@
+/**
+ * Test distinct() on multikey indexes using a dotted path.
+ *
+ * Assumes the collection is not sharded, because sharding the collection could result in different
+ * plans being chosen on different shards (for example, if an index is multikey on one shard but
+ * not another).
+ * Doesn't support stepdowns because it runs explain() on an aggregation (which can apparently
+ * return incomplete results).
+ * @tags: [
+ * assumes_unsharded_collection,
+ * does_not_support_stepdowns,
+ * ]
+ */
+(function() {
+"use strict";
+load("jstests/libs/analyze_plan.js"); // For planHasStage().
+
+const coll = db.distinct_multikey;
+coll.drop();
+assert.commandWorked(coll.createIndex({"a.b.c": 1}));
+
+assert.commandWorked(coll.insert({a: {b: {c: 1}}}));
+assert.commandWorked(coll.insert({a: {b: {c: 2}}}));
+assert.commandWorked(coll.insert({a: {b: {c: 3}}}));
+assert.commandWorked(coll.insert({a: {b: {notRelevant: 3}}}));
+assert.commandWorked(coll.insert({a: {notRelevant: 3}}));
+
+const numPredicate = {
+ "a.b.c": {$gt: 0}
+};
+
+function getAggPipelineForDistinct(path) {
+ return [{$group: {_id: "$" + path}}];
+}
+
+// Run an agg pipeline with a $group, and convert the results so they're equivalent
+// to what a distinct() would return.
+// Note that $group will treat an array as its own key rather than unwinding it. This means
+// that a $group on a field that's multikey will have different behavior than a distinct(), so
+// we only use this function for non-multikey fields.
+function distinctResultsFromPipeline(pipeline) {
+ const res = coll.aggregate(pipeline).toArray();
+ return res.map((x) => x._id);
+}
+
+// Be sure a distinct scan is used when the index is not multi key.
+(function testDistinctWithNonMultikeyIndex() {
+ const results = coll.distinct("a.b.c");
+ // TODO SERVER-14832: Returning 'null' here is inconsistent with the behavior when no index
+ // is present.
+ assert.sameMembers([1, 2, 3, null], results);
+
+ const expl = coll.explain().distinct("a.b.c");
+ assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN"), expl);
+
+ // Do an equivalent query using $group.
+ const pipeline = getAggPipelineForDistinct("a.b.c");
+ const aggResults = distinctResultsFromPipeline(pipeline);
+ assert.sameMembers(aggResults, results);
+ const aggExpl = assert.commandWorked(coll.explain().aggregate(pipeline));
+ assert.gt(getAggPlanStages(aggExpl, "DISTINCT_SCAN").length, 0);
+})();
+
+// Distinct with a predicate.
+(function testDistinctWithPredWithNonMultikeyIndex() {
+ const results = coll.distinct("a.b.c", numPredicate);
+ assert.sameMembers([1, 2, 3], results);
+
+ const expl = coll.explain().distinct("a.b.c", numPredicate);
+
+ assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN"), expl);
+
+ const pipeline = [{$match: numPredicate}].concat(getAggPipelineForDistinct("a.b.c"));
+ const aggResults = distinctResultsFromPipeline(pipeline);
+ assert.sameMembers(aggResults, results);
+ const aggExpl = assert.commandWorked(coll.explain().aggregate(pipeline));
+ assert.gt(getAggPlanStages(aggExpl, "DISTINCT_SCAN").length, 0);
+})();
+
+// Make the index multi key.
+assert.commandWorked(coll.insert({a: {b: [{c: 4}, {c: 5}]}}));
+assert.commandWorked(coll.insert({a: {b: [{c: 4}, {c: 6}]}}));
+// Empty array is indexed as 'undefined'.
+assert.commandWorked(coll.insert({a: {b: {c: []}}}));
+
+// We should still use the index as long as the path we distinct() on is never an array
+// index.
+(function testDistinctWithMultikeyIndex() {
+ const multiKeyResults = coll.distinct("a.b.c");
+ // TODO SERVER-14832: Returning 'null' and 'undefined' here is inconsistent with the
+ // behavior when no index is present.
+ assert.sameMembers([1, 2, 3, 4, 5, 6, null, undefined], multiKeyResults);
+ const expl = coll.explain().distinct("a.b.c");
+
+ assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN"));
+
+ // Not running same query with $group now that the field is multikey. See comment above.
+})();
+
+// We cannot use the DISTINCT_SCAN optimization when there is a multikey path in the key and
+// there is a predicate. The reason is that we may have a predicate like {a: 4}, and two
+// documents: {a: [4, 5]}, {a: [4, 6]}. With a DISTINCT_SCAN, we would "skip over" one of the
+// documents, and leave out either '5' or '6', rather than providing the correct result of
+// [4, 5, 6]. The test below is for a similar case.
+(function testDistinctWithPredWithMultikeyIndex() {
+ const pred = {"a.b.c": 4};
+ const results = coll.distinct("a.b.c", pred);
+ assert.sameMembers([4, 5, 6], results);
+
+ const expl = coll.explain().distinct("a.b.c", pred);
+ const winningPlan = getWinningPlan(expl.queryPlanner);
+ assert.eq(false, planHasStage(db, winningPlan, "DISTINCT_SCAN"), expl);
+ assert.eq(true, planHasStage(db, winningPlan, "IXSCAN"), expl);
+
+ // Not running same query with $group now that the field is multikey. See comment above.
+})();
+
+// Perform a distinct on a path where the last component is multikey.
+(function testDistinctOnPathWhereLastComponentIsMultiKey() {
+ assert.commandWorked(coll.createIndex({"a.b": 1}));
+ const multiKeyResults = coll.distinct("a.b");
+ assert.sameMembers(
+ [
+ null, // From the document with no 'b' field. TODO SERVER-14832: this is
+ // inconsistent with behavior when no index is present.
+ {c: 1},
+ {c: 2},
+ {c: 3},
+ {c: 4},
+ {c: 5},
+ {c: 6},
+ {c: []},
+ {notRelevant: 3}
+ ],
+ multiKeyResults);
+
+ const expl = coll.explain().distinct("a.b");
+ assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN"));
+
+ // Not running same query with $group now that the field is multikey. See comment above.
+})();
+
+(function testDistinctOnPathWhereLastComponentIsMultiKeyWithPredicate() {
+ assert.commandWorked(coll.createIndex({"a.b": 1}));
+ const pred = {"a.b": {$type: "array"}};
+ const multiKeyResults = coll.distinct("a.b", pred);
+ assert.sameMembers(
+ [
+ {c: 4},
+ {c: 5},
+ {c: 6},
+ ],
+ multiKeyResults);
+
+ const expl = coll.explain().distinct("a.b", pred);
+ const winningPlan = getWinningPlan(expl.queryPlanner);
+ assert.eq(false, planHasStage(db, winningPlan, "DISTINCT_SCAN"));
+ assert.eq(true, planHasStage(db, winningPlan, "IXSCAN"));
+
+ // Not running same query with $group now that the field is multikey. See comment above.
+})();
+
+// If the path we distinct() on includes an array index, a COLLSCAN should be used,
+// even if an index is available on the prefix to the array component ("a.b" in this case).
+(function testDistinctOnNumericMultikeyPathNoIndex() {
+ const res = coll.distinct("a.b.0");
+ assert.eq(res, [{c: 4}]);
+
+ const expl = coll.explain().distinct("a.b.0");
+ assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "COLLSCAN"), expl);
+
+ // Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will
+ // only treat '0' as a field name (not array index).
+})();
+
+// Creating an index on "a.b.0" and doing a distinct on it should be able to use DISTINCT_SCAN.
+(function testDistinctOnNumericMultikeyPathWithIndex() {
+ assert.commandWorked(coll.createIndex({"a.b.0": 1}));
+ assert.commandWorked(coll.insert({a: {b: {0: "hello world"}}}));
+ const res = coll.distinct("a.b.0");
+ assert.sameMembers(res, [{c: 4}, "hello world"]);
+
+ const expl = coll.explain().distinct("a.b.0");
+ assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN"), expl);
+
+ // Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will
+ // only treat '0' as a field name (not array index).
+})();
+
+// Inserting an array on "a", creating an index on "a.b.0", and doing a distinct on it should use an
+// IXSCAN, as "a" is now multikey. See explanation above about why a DISTINCT_SCAN cannot be used
+// when the path given is multikey.
+(function testDistinctWithPredOnNumericMultikeyPathWithIndex() {
+ const pred = {"a.b.0": {$type: "object"}};
+ const res = coll.distinct("a.b.0", pred);
+ assert.sameMembers(res, [{c: 4}]);
+
+ // Make "a" multikey in order to ensure that a DISTINCT_SCAN plan on "a.b.0" is not legal.
+ assert.commandWorked(coll.insert({a: [1, 2, 3]}));
+
+ const expl = coll.explain().distinct("a.b.0", pred);
+ const winningPlan = getWinningPlan(expl.queryPlanner);
+ assert.eq(false, planHasStage(db, winningPlan, "DISTINCT_SCAN"), expl);
+ assert.eq(true, planHasStage(db, winningPlan, "IXSCAN"), expl);
+
+ // Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will
+ // only treat '0' as a field name (not array index).
+})();
+})();