summaryrefslogtreecommitdiff
path: root/jstests/core/distinct_multikey_dotted_path.js
blob: 7b2bee44545c26431ac8255354ac0f620cfe1a61 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
/**
 * Test distinct() on multikey indexes using a dotted path.
 *
 * Assumes the collection is not sharded, because sharding the collection could result in different
 * plans being chosen on different shards (for example, if an index is multikey on one shard but
 * not another).
 * Doesn't support stepdowns because it runs explain() on an aggregation (which can apparently
 * return incomplete results).
 * @tags: [assumes_unsharded_collection, does_not_support_stepdowns]
 */
(function() {
    "use strict";
    load("jstests/libs/analyze_plan.js");  // For planHasStage().

    const coll = db.distinct_multikey;
    coll.drop();
    assert.commandWorked(coll.createIndex({"a.b.c": 1}));

    assert.commandWorked(coll.insert({a: {b: {c: 1}}}));
    assert.commandWorked(coll.insert({a: {b: {c: 2}}}));
    assert.commandWorked(coll.insert({a: {b: {c: 3}}}));
    assert.commandWorked(coll.insert({a: {b: {notRelevant: 3}}}));
    assert.commandWorked(coll.insert({a: {notRelevant: 3}}));

    const numPredicate = {"a.b.c": {$gt: 0}};

    function getAggPipelineForDistinct(path) {
        return [{$group: {_id: "$" + path}}];
    }

    // Run an agg pipeline with a $group, and convert the results so they're equivalent
    // to what a distinct() would return.
    function distinctResultsFromPipeline(pipeline) {
        const res = coll.aggregate(pipeline).toArray();
        return res.map((x) => x._id);
    }

    // Be sure a distinct scan is used when the index is not multi key.
    (function testDistinctWithNonMultikeyIndex() {
        const results = coll.distinct("a.b.c");
        // TODO SERVER-14832: Returning 'null' here is inconsistent with the behavior when no index
        // is present.
        assert.sameMembers([1, 2, 3, null], results);

        const expl = coll.explain().distinct("a.b.c");
        assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"), expl);

        // Do an equivalent query using $group.
        const pipeline = getAggPipelineForDistinct("a.b.c");
        const aggResults = distinctResultsFromPipeline(pipeline);
        assert.sameMembers(aggResults, results);
        const aggExpl = assert.commandWorked(coll.explain().aggregate(pipeline));
        assert.gt(getAggPlanStages(aggExpl, "DISTINCT_SCAN").length, 0);
    })();

    // Distinct with a predicate.
    (function testDistinctWithPredWithNonMultikeyIndex() {
        const results = coll.distinct("a.b.c", numPredicate);
        assert.sameMembers([1, 2, 3], results);

        const expl = coll.explain().distinct("a.b.c", numPredicate);

        assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"), expl);

        // TODO: SERVER-40465 not checking the results for an equivalent aggregation, since results
        // from the aggregation with a predicate (a $match preceeding the $group) may not match the
        // results from distinct() with a predicate.
    })();

    // Make the index multi key.
    assert.commandWorked(coll.insert({a: {b: [{c: 4}, {c: 5}]}}));
    assert.commandWorked(coll.insert({a: {b: [{c: 4}, {c: 6}]}}));
    // Empty array is indexed as 'undefined'.
    assert.commandWorked(coll.insert({a: {b: {c: []}}}));

    // We should still use the index as long as the path we distinct() on is never an array
    // index.
    (function testDistinctWithMultikeyIndex() {
        const multiKeyResults = coll.distinct("a.b.c");
        // TODO SERVER-14832: Returning 'null' and 'undefined' here is inconsistent with the
        // behavior when no index is present.
        assert.sameMembers([1, 2, 3, 4, 5, 6, null, undefined], multiKeyResults);
        const expl = coll.explain().distinct("a.b.c");

        assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"));

        // Do the same thing with aggregation.
        const pipeline = getAggPipelineForDistinct("a.b.c");
        const aggResults = distinctResultsFromPipeline(pipeline);
        assert.sameMembers(aggResults, multiKeyResults);
        const aggExpl = assert.commandWorked(coll.explain().aggregate(pipeline));
        assert.gt(getAggPlanStages(aggExpl, "DISTINCT_SCAN").length, 0);
    })();

    // We cannot use the DISTINCT_SCAN optimization when there is a multikey path in the key and
    // there is a predicate. The reason is that we may have a predicate like {a: 4}, and two
    // documents: {a: [4, 5]}, {a: [4, 6]}. With a DISTINCT_SCAN, we would "skip over" one of the
    // documents, and leave out either '5' or '6', rather than providing the correct result of
    // [4, 5, 6]. The test below is for a similar case.
    (function testDistinctWithPredWithMultikeyIndex() {
        const pred = {"a.b.c": 4};
        const results = coll.distinct("a.b.c", pred);
        assert.sameMembers([4, 5, 6], results);

        const expl = coll.explain().distinct("a.b.c", pred);
        assert.eq(false, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"), expl);
        assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "IXSCAN"), expl);

        // TODO: SERVER-40465 not checking the results for an equivalent aggregation, since results
        // from the aggregation with a predicate (a $match preceeding the $group) may not match the
        // results from distinct() with a predicate.
    })();

    // Perform a distinct on a path where the last component is multikey.
    (function testDistinctOnPathWhereLastComponentIsMultiKey() {
        assert.commandWorked(coll.createIndex({"a.b": 1}));
        const multiKeyResults = coll.distinct("a.b");
        assert.sameMembers(
            [
              null,  // From the document with no 'b' field. TODO SERVER-14832: this is
                     // inconsistent with behavior when no index is present.
              {c: 1},
              {c: 2},
              {c: 3},
              {c: 4},
              {c: 5},
              {c: 6},
              {c: []},
              {notRelevant: 3}
            ],
            multiKeyResults);

        const expl = coll.explain().distinct("a.b");
        assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"));

        // Do the same thing with aggregation.
        const pipeline = getAggPipelineForDistinct("a.b");
        const aggResults = distinctResultsFromPipeline(pipeline);
        assert.sameMembers(aggResults, multiKeyResults);
        const aggExpl = assert.commandWorked(coll.explain().aggregate(pipeline));
        assert.gt(getAggPlanStages(aggExpl, "DISTINCT_SCAN").length, 0);
    })();

    (function testDistinctOnPathWhereLastComponentIsMultiKeyWithPredicate() {
        assert.commandWorked(coll.createIndex({"a.b": 1}));
        const pred = {"a.b": {$type: "array"}};
        const multiKeyResults = coll.distinct("a.b", pred);
        assert.sameMembers(
            [
              {c: 4},
              {c: 5},
              {c: 6},
            ],
            multiKeyResults);

        const expl = coll.explain().distinct("a.b", pred);
        assert.eq(false, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"));
        assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "IXSCAN"));

        // TODO: SERVER-40465 not checking the results for an equivalent aggregation, since results
        // from the aggregation with a predicate (a $match preceeding the $group) may not match the
        // results from distinct() with a predicate.
    })();

    // If the path we distinct() on includes an array index, a COLLSCAN should be used,
    // even if an index is available on the prefix to the array component ("a.b" in this case).
    (function testDistinctOnNumericMultikeyPathNoIndex() {
        const res = coll.distinct("a.b.0");
        assert.eq(res, [{c: 4}]);

        const expl = coll.explain().distinct("a.b.0");
        assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "COLLSCAN"), expl);

        // Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will
        // only treat '0' as a field name (not array index).
    })();

    // Creating an index on "a.b.0" and doing a distinct on it should be able to use DISTINCT_SCAN.
    (function testDistinctOnNumericMultikeyPathWithIndex() {
        assert.commandWorked(coll.createIndex({"a.b.0": 1}));
        assert.commandWorked(coll.insert({a: {b: {0: "hello world"}}}));
        const res = coll.distinct("a.b.0");
        assert.sameMembers(res, [{c: 4}, "hello world"]);

        const expl = coll.explain().distinct("a.b.0");
        assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"), expl);

        // Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will
        // only treat '0' as a field name (not array index).
    })();

    // Creating an index on "a.b.0" and doing a distinct on it should use an IXSCAN, as "a.b" is
    // multikey. See explanation above about why a DISTINCT_SCAN cannot be used when the path
    // given is multikey.
    (function testDistinctWithPredOnNumericMultikeyPathWithIndex() {
        const pred = {"a.b.0": {$type: "object"}};
        const res = coll.distinct("a.b.0", pred);
        assert.sameMembers(res, [{c: 4}]);

        const expl = coll.explain().distinct("a.b.0", pred);
        assert.eq(false, planHasStage(db, expl.queryPlanner.winningPlan, "DISTINCT_SCAN"), expl);
        assert.eq(true, planHasStage(db, expl.queryPlanner.winningPlan, "IXSCAN"), expl);

        // Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will
        // only treat '0' as a field name (not array index).
    })();
})();