summaryrefslogtreecommitdiff
path: root/jstests/core/distinct_multikey_dotted_path.js
blob: c8530fe679997086058ad929483690693a06d1c5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
/**
 * Test distinct() on multikey indexes using a dotted path.
 *
 * Assumes the collection is not sharded, because sharding the collection could result in different
 * plans being chosen on different shards (for example, if an index is multikey on one shard but
 * not another).
 * Doesn't support stepdowns because it runs explain() on an aggregation (which can apparently
 * return incomplete results).
 * @tags: [
 *   assumes_unsharded_collection,
 *   does_not_support_stepdowns,
 * ]
 */
(function() {
"use strict";
load("jstests/libs/analyze_plan.js");  // For planHasStage().

const coll = db.distinct_multikey;
coll.drop();
assert.commandWorked(coll.createIndex({"a.b.c": 1}));

assert.commandWorked(coll.insert({a: {b: {c: 1}}}));
assert.commandWorked(coll.insert({a: {b: {c: 2}}}));
assert.commandWorked(coll.insert({a: {b: {c: 3}}}));
assert.commandWorked(coll.insert({a: {b: {notRelevant: 3}}}));
assert.commandWorked(coll.insert({a: {notRelevant: 3}}));

const numPredicate = {
    "a.b.c": {$gt: 0}
};

function getAggPipelineForDistinct(path) {
    return [{$group: {_id: "$" + path}}];
}

// Run an agg pipeline with a $group, and convert the results so they're equivalent
// to what a distinct() would return.
// Note that $group will treat an array as its own key rather than unwinding it. This means
// that a $group on a field that's multikey will have different behavior than a distinct(), so
// we only use this function for non-multikey fields.
function distinctResultsFromPipeline(pipeline) {
    const res = coll.aggregate(pipeline).toArray();
    return res.map((x) => x._id);
}

// Be sure a distinct scan is used when the index is not multi key.
(function testDistinctWithNonMultikeyIndex() {
    const results = coll.distinct("a.b.c");
    // TODO SERVER-14832: Returning 'null' here is inconsistent with the behavior when no index
    // is present.
    assert.sameMembers([1, 2, 3, null], results);

    const expl = coll.explain().distinct("a.b.c");
    assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN"), expl);

    // Do an equivalent query using $group.
    const pipeline = getAggPipelineForDistinct("a.b.c");
    const aggResults = distinctResultsFromPipeline(pipeline);
    assert.sameMembers(aggResults, results);
    const aggExpl = assert.commandWorked(coll.explain().aggregate(pipeline));
    assert.gt(getAggPlanStages(aggExpl, "DISTINCT_SCAN").length, 0);
})();

// Distinct with a predicate.
(function testDistinctWithPredWithNonMultikeyIndex() {
    const results = coll.distinct("a.b.c", numPredicate);
    assert.sameMembers([1, 2, 3], results);

    const expl = coll.explain().distinct("a.b.c", numPredicate);

    assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN"), expl);

    const pipeline = [{$match: numPredicate}].concat(getAggPipelineForDistinct("a.b.c"));
    const aggResults = distinctResultsFromPipeline(pipeline);
    assert.sameMembers(aggResults, results);
    const aggExpl = assert.commandWorked(coll.explain().aggregate(pipeline));
    assert.gt(getAggPlanStages(aggExpl, "DISTINCT_SCAN").length, 0);
})();

// Make the index multi key.
assert.commandWorked(coll.insert({a: {b: [{c: 4}, {c: 5}]}}));
assert.commandWorked(coll.insert({a: {b: [{c: 4}, {c: 6}]}}));
// Empty array is indexed as 'undefined'.
assert.commandWorked(coll.insert({a: {b: {c: []}}}));

// We should still use the index as long as the path we distinct() on is never an array
// index.
(function testDistinctWithMultikeyIndex() {
    const multiKeyResults = coll.distinct("a.b.c");
    // TODO SERVER-14832: Returning 'null' and 'undefined' here is inconsistent with the
    // behavior when no index is present.
    assert.sameMembers([1, 2, 3, 4, 5, 6, null, undefined], multiKeyResults);
    const expl = coll.explain().distinct("a.b.c");

    assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN"));

    // Not running same query with $group now that the field is multikey. See comment above.
})();

// We cannot use the DISTINCT_SCAN optimization when there is a multikey path in the key and
// there is a predicate. The reason is that we may have a predicate like {a: 4}, and two
// documents: {a: [4, 5]}, {a: [4, 6]}. With a DISTINCT_SCAN, we would "skip over" one of the
// documents, and leave out either '5' or '6', rather than providing the correct result of
// [4, 5, 6]. The test below is for a similar case.
(function testDistinctWithPredWithMultikeyIndex() {
    const pred = {"a.b.c": 4};
    const results = coll.distinct("a.b.c", pred);
    assert.sameMembers([4, 5, 6], results);

    const expl = coll.explain().distinct("a.b.c", pred);
    const winningPlan = getWinningPlan(expl.queryPlanner);
    assert.eq(false, planHasStage(db, winningPlan, "DISTINCT_SCAN"), expl);
    assert.eq(true, planHasStage(db, winningPlan, "IXSCAN"), expl);

    // Not running same query with $group now that the field is multikey. See comment above.
})();

// Perform a distinct on a path where the last component is multikey.
(function testDistinctOnPathWhereLastComponentIsMultiKey() {
    assert.commandWorked(coll.createIndex({"a.b": 1}));
    const multiKeyResults = coll.distinct("a.b");
    assert.sameMembers(
        [
            null,  // From the document with no 'b' field. TODO SERVER-14832: this is
                   // inconsistent with behavior when no index is present.
            {c: 1},
            {c: 2},
            {c: 3},
            {c: 4},
            {c: 5},
            {c: 6},
            {c: []},
            {notRelevant: 3}
        ],
        multiKeyResults);

    const expl = coll.explain().distinct("a.b");
    assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN"));

    // Not running same query with $group now that the field is multikey. See comment above.
})();

(function testDistinctOnPathWhereLastComponentIsMultiKeyWithPredicate() {
    assert.commandWorked(coll.createIndex({"a.b": 1}));
    const pred = {"a.b": {$type: "array"}};
    const multiKeyResults = coll.distinct("a.b", pred);
    assert.sameMembers(
        [
            {c: 4},
            {c: 5},
            {c: 6},
        ],
        multiKeyResults);

    const expl = coll.explain().distinct("a.b", pred);
    const winningPlan = getWinningPlan(expl.queryPlanner);
    assert.eq(false, planHasStage(db, winningPlan, "DISTINCT_SCAN"));
    assert.eq(true, planHasStage(db, winningPlan, "IXSCAN"));

    // Not running same query with $group now that the field is multikey. See comment above.
})();

// If the path we distinct() on includes an array index, a COLLSCAN should be used,
// even if an index is available on the prefix to the array component ("a.b" in this case).
(function testDistinctOnNumericMultikeyPathNoIndex() {
    const res = coll.distinct("a.b.0");
    assert.eq(res, [{c: 4}]);

    const expl = coll.explain().distinct("a.b.0");
    assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "COLLSCAN"), expl);

    // Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will
    // only treat '0' as a field name (not array index).
})();

// Creating an index on "a.b.0" and doing a distinct on it should be able to use DISTINCT_SCAN.
(function testDistinctOnNumericMultikeyPathWithIndex() {
    assert.commandWorked(coll.createIndex({"a.b.0": 1}));
    assert.commandWorked(coll.insert({a: {b: {0: "hello world"}}}));
    const res = coll.distinct("a.b.0");
    assert.sameMembers(res, [{c: 4}, "hello world"]);

    const expl = coll.explain().distinct("a.b.0");
    assert.eq(true, planHasStage(db, getWinningPlan(expl.queryPlanner), "DISTINCT_SCAN"), expl);

    // Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will
    // only treat '0' as a field name (not array index).
})();

// Inserting an array on "a", creating an index on "a.b.0", and doing a distinct on it should use an
// IXSCAN, as "a" is now multikey. See explanation above about why a DISTINCT_SCAN cannot be used
// when the path given is multikey.
(function testDistinctWithPredOnNumericMultikeyPathWithIndex() {
    const pred = {"a.b.0": {$type: "object"}};
    const res = coll.distinct("a.b.0", pred);
    assert.sameMembers(res, [{c: 4}]);

    // Make "a" multikey in order to ensure that a DISTINCT_SCAN plan on "a.b.0" is not legal.
    assert.commandWorked(coll.insert({a: [1, 2, 3]}));

    const expl = coll.explain().distinct("a.b.0", pred);
    const winningPlan = getWinningPlan(expl.queryPlanner);
    assert.eq(false, planHasStage(db, winningPlan, "DISTINCT_SCAN"), expl);
    assert.eq(true, planHasStage(db, winningPlan, "IXSCAN"), expl);

    // Will not attempt the equivalent query with aggregation, since $group by "a.b.0" will
    // only treat '0' as a field name (not array index).
})();
})();