summaryrefslogtreecommitdiff
path: root/jstests/core/index/wildcard/wildcard_index_distinct_scan.js
blob: e56f81d5b626a99e73574382833afd94fe073efa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
/**
 * Tests that a $** index can provide a DISTINCT_SCAN or indexed solution where appropriate.
 * @tags: [
 *   assumes_read_concern_local,
 *   no_selinux,
 * ]
 */
(function() {
"use strict";

load("jstests/aggregation/extras/utils.js");  // For arrayEq.
load("jstests/libs/analyze_plan.js");         // For planHasStage and getPlanStages.

const assertArrayEq = (l, r) => assert(arrayEq(l, r), tojson(l) + " != " + tojson(r));

const coll = db.all_paths_distinct_scan;
coll.drop();

// Records whether the field which we are distinct-ing over is multikey.
let distinctFieldIsMultikey = false;

// Insert a set of documents into the collection. The 'listOfValues' argument contains values of
// various types, and we insert numerous documents containing each of the values. This allows us
// to confirm that 'distinct' with a wildcard index (1) can return values of any type, (2) will
// only return the set of unique values, and (3) handles multikey values appropriately in cases
// where 'listOfValues' includes an array.
function insertTestData(fieldName, listOfValues) {
    distinctFieldIsMultikey = listOfValues.some((val) => Array.isArray(val));
    const bulk = coll.initializeUnorderedBulkOp();
    coll.drop();
    for (let i = 0; i < 200; i++) {
        const didx = (i % listOfValues.length);
        bulk.insert({[fieldName]: listOfValues[didx], b: didx, c: (-i)});
    }
    assert.commandWorked(bulk.execute());
}

/**
 * Runs a single wildcard distinct scan test. If 'expectedPath' is non-null, verifies that there
 * is an indexed solution that uses the $** index with the given path string. If 'expectedPath'
 * is null, verifies that no indexed solution was found.
 */
function assertWildcardDistinctScan(
    {distinctKey, query, pathProjection, expectedScanType, expectedResults, expectedPath}) {
    // Drop all indexes before running the test. This allows us to perform the distinct with a
    // COLLSCAN at first, to confirm that the results are as expected.
    assert.commandWorked(coll.dropIndexes());

    // Confirm that the distinct runs with a COLLSCAN.
    let winningPlan = getWinningPlan(coll.explain().distinct(distinctKey, query).queryPlanner);
    assert(planHasStage(coll.getDB(), winningPlan, "COLLSCAN"));
    // Run the distinct and confirm that it produces the expected results.
    assertArrayEq(coll.distinct(distinctKey, query), expectedResults);

    // Build a wildcard index on the collection and re-run the test.
    const options = (pathProjection ? {wildcardProjection: pathProjection} : {});
    assert.commandWorked(coll.createIndex({"$**": 1}, options));

    // We expect the following outcomes for a 'distinct' that attempts to use a $** index:
    // - No query: COLLSCAN.
    // - Query for object value on distinct field: COLLSCAN.
    // - Query for non-object value on non-multikey distinct field: DISTINCT_SCAN.
    // - Query for non-object value on multikey distinct field: IXSCAN with FETCH.
    // - Query for non-object value on field other than the distinct field: IXSCAN with FETCH.
    const fetchIsExpected = (expectedScanType !== "DISTINCT_SCAN");

    // Explain the query, and determine whether an indexed solution is available. If
    // 'expectedPath' is null, then we do not expect the $** index to provide a plan.
    winningPlan = getWinningPlan(coll.explain().distinct(distinctKey, query).queryPlanner);
    if (!expectedPath) {
        assert(planHasStage(coll.getDB(), winningPlan, "COLLSCAN"));
        assert.eq(expectedScanType, "COLLSCAN");
        return;
    }

    // Confirm that the $** distinct scan produces the expected results.
    assertArrayEq(coll.distinct(distinctKey, query), expectedResults);
    // Confirm that the $** plan adheres to 'fetchIsExpected' and 'expectedScanType'.
    assert.eq(planHasStage(coll.getDB(), winningPlan, "FETCH"), fetchIsExpected);
    assert(planHasStage(coll.getDB(), winningPlan, expectedScanType));
    assert.docEq({$_path: 1, [expectedPath]: 1},
                 getPlanStages(winningPlan, expectedScanType).shift().keyPattern);
}

// The set of distinct values that should be produced by each of the test listed below.
const distinctValues = [1, 2, "3", null, {c: 5, d: 6}, {d: 6, c: 5}, {}, 9, 10, {e: 11}];

// Define the set of values that the distinct field may take. The first test case consists
// entirely of non-multikey fields, while the second includes multikey fields.
const testCases = [
    // Non-multikey field values.
    {
        insertField: "a",
        queryField: "a",
        fieldValues: [1, 2, "3", null, {c: 5, d: 6}, {d: 6, c: 5}, {}, 9, 10, {e: 11}]
    },
    // Multikey field values. Note that values within arrays are unwrapped by the distinct
    // scan, and empty arrays are thus not included.
    {
        insertField: "a",
        queryField: "a",
        fieldValues: [1, 2, "3", null, {c: 5, d: 6}, {d: 6, c: 5}, {}, [], [9, 10], [{e: 11}]]
    },
    // Non-multikey dotted field values.
    {
        insertField: "a",
        queryField: "a.x",
        fieldValues: [
            {x: 1},
            {x: 2},
            {x: "3"},
            {x: null},
            {x: {c: 5, d: 6}},
            {x: {d: 6, c: 5}},
            {x: {}},
            {x: 9},
            {x: 10},
            {x: {e: 11}}
        ]
    },
    // Multikey dotted field values.
    {
        insertField: "a",
        queryField: "a.x",
        fieldValues: [
            [{x: 1}],
            [{x: 2}],
            [{x: "3"}],
            [{x: null}],
            [{x: {c: 5, d: 6}}],
            [{x: {d: 6, c: 5}}],
            [{x: {}}],
            [{x: []}],
            [{x: 9}, {x: 10}],
            [{x: [{e: 11}]}]
        ]
    }
];

// Run all combinations of query, no-query, multikey and non-multikey distinct tests.
for (let testCase of testCases) {
    // Log the start of the test and create the dataset.
    jsTestLog("Test case: " + tojson(testCase));
    insertTestData(testCase.insertField, testCase.fieldValues);

    // Test that a $** index cannot provide an indexed 'distinct' without a query.
    assertWildcardDistinctScan({
        distinctKey: testCase.queryField,
        query: {},
        expectedScanType: "COLLSCAN",
        expectedResults: distinctValues,
        expectedPath: null
    });

    // Test that a $** index can provide an indexed 'distinct' for distinct-key queries.
    assertWildcardDistinctScan({
        distinctKey: testCase.queryField,
        query: {[testCase.queryField]: {$lt: 3}},
        expectedScanType: (distinctFieldIsMultikey ? "IXSCAN" : "DISTINCT_SCAN"),
        expectedResults: [1, 2],
        expectedPath: testCase.queryField
    });

    // Test that a $** index can provide an indexed 'distinct' for a query on another field.
    const offset = Math.floor(testCase.fieldValues.length / 2);
    assertWildcardDistinctScan({
        distinctKey: testCase.queryField,
        query: {b: {$gte: offset}},
        expectedScanType: "IXSCAN",
        expectedResults: distinctValues.slice(offset),
        expectedPath: "b"
    });

    // Test that a $** index cannot provide an indexed 'distinct' for object value queries.
    assertWildcardDistinctScan({
        distinctKey: testCase.queryField,
        query: {[testCase.queryField]: {$gte: {c: 5}}},
        expectedScanType: "COLLSCAN",
        expectedResults: [{c: 5, d: 6}, {d: 6, c: 5}, {e: 11}],
        expectedPath: null
    });

    // Test that a $** index can provide an indexed 'distinct' for a MinMax query.
    assertWildcardDistinctScan({
        distinctKey: testCase.queryField,
        query: {[testCase.queryField]: {$gte: MinKey, $lte: MaxKey}},
        expectedScanType: "IXSCAN",
        expectedResults: distinctValues,
        expectedPath: testCase.queryField
    });

    // Test that a $** index cannot provide an indexed 'distinct' for excluded fields.
    assertWildcardDistinctScan({
        distinctKey: testCase.queryField,
        query: {c: {$lt: 0}},
        pathProjection: {c: 0},
        expectedScanType: "COLLSCAN",
        expectedResults: distinctValues,
        expectedPath: null
    });
}
})();