summaryrefslogtreecommitdiff
path: root/jstests/sharding/merge_does_not_force_pipeline_split.js
blob: ce9195379510720ef7a28310651a76b1fe9d5ebe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
// Tests that a $merge stage does not force a pipeline to split into a "shards part" and a "merging
// part" if no other stage in the pipeline would force such a split.
(function() {
"use strict";

load("jstests/aggregation/extras/merge_helpers.js");  // For withEachMergeMode.

const st = new ShardingTest({shards: 2, rs: {nodes: 1}});

const mongosDB = st.s.getDB("test_db");

const inColl = mongosDB["inColl"];
// Two different output collections will be sharded by different keys.
const outCollById = mongosDB["outCollById"];
const outCollBySK = mongosDB["outCollBySK"];
st.shardColl(outCollById, {_id: 1}, {_id: 500}, {_id: 500}, mongosDB.getName());
st.shardColl(outCollBySK, {sk: 1}, {sk: 500}, {sk: 500}, mongosDB.getName());
const numDocs = 1000;

function insertData(coll) {
    const bulk = coll.initializeUnorderedBulkOp();
    for (let i = 0; i < numDocs; i++) {
        bulk.insert({_id: i, sk: numDocs - i});
    }
    assert.commandWorked(bulk.execute());
}

// Shard the input collection.
st.shardColl(inColl, {_id: 1}, {_id: 500}, {_id: 500}, mongosDB.getName());

// Insert some data to the input collection.
insertData(inColl);

function assertMergeRunsOnShards(explain) {
    assert(explain.hasOwnProperty("splitPipeline"), tojson(explain));
    assert(explain.splitPipeline.hasOwnProperty("shardsPart"), tojson(explain));
    assert.eq(
        explain.splitPipeline.shardsPart.filter(stage => stage.hasOwnProperty("$merge")).length,
        1,
        tojson(explain));
    assert(explain.splitPipeline.hasOwnProperty("mergerPart"), tojson(explain));
    assert.eq([], explain.splitPipeline.mergerPart, tojson(explain));
}

// Test that a simple $merge can run in parallel. Note that we still expect a 'splitPipeline' in
// the explain output, but the merging half should be empty to indicate that the entire thing is
// executing in parallel on the shards.

withEachMergeMode(({whenMatchedMode, whenNotMatchedMode}) => {
    assert.commandWorked(outCollById.remove({}));
    assert.commandWorked(outCollBySK.remove({}));

    let explain = inColl.explain().aggregate([{
        $merge: {
            into: outCollById.getName(),
            whenMatched: whenMatchedMode,
            whenNotMatched: whenNotMatchedMode
        }
    }]);
    assertMergeRunsOnShards(explain);
    assert.eq(outCollById.find().itcount(), 0);
    // We expect the test to succeed for all $merge modes. However, the 'whenNotMatched: fail'
    // mode will cause the test to fail if the source collection has a document without a match
    // in the target collection. Similarly 'whenNotMatched: discard' will fail the assertion
    // below for the expected number of document in target collection. So we populate the target
    // collection with the same documents as in the source.
    if (whenNotMatchedMode == "fail" || whenNotMatchedMode == "discard") {
        insertData(outCollById);
    }

    // Actually execute the pipeline and make sure it works as expected.
    assert.doesNotThrow(() => inColl.aggregate([{
        $merge: {
            into: outCollById.getName(),
            whenMatched: whenMatchedMode,
            whenNotMatched: whenNotMatchedMode
        }
    }]));
    assert.eq(outCollById.find().itcount(), numDocs);

    // Test the same thing but in a pipeline where the output collection's shard key differs
    // from the input collection's.
    explain = inColl.explain().aggregate([{
        $merge: {
            into: outCollBySK.getName(),
            whenMatched: whenMatchedMode,
            whenNotMatched: whenNotMatchedMode
        }
    }]);
    assertMergeRunsOnShards(explain);
    // Again, test that execution works as expected.
    assert.eq(outCollBySK.find().itcount(), 0);

    if (whenNotMatchedMode == "fail" || whenNotMatchedMode == "discard") {
        insertData(outCollBySK);
    }
    assert.doesNotThrow(() => inColl.aggregate([{
        $merge: {
            into: outCollBySK.getName(),
            whenMatched: whenMatchedMode,
            whenNotMatched: whenNotMatchedMode
        }
    }]));
    assert.eq(outCollBySK.find().itcount(), numDocs);
});

st.stop();
}());