jstests/sharding/autosplit_heuristics.js


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91

/**
 * Tests autosplitting heuristics, and that the heuristic counting of chunk sizes
 * works as expected even after splitting.
 *
 * This test is labeled resource intensive because its total io_write is 53MB compared to a median
 * of 5MB across all sharding tests in wiredTiger.
 * @tags: [resource_intensive]
 */
(function() {
'use strict';
load('jstests/sharding/autosplit_include.js');

var st = new ShardingTest({shards: 1, mongos: 1, other: {chunkSize: 1, enableAutoSplit: true}});

// The balancer is by default stopped, thus it will NOT interfere unpredictably with the chunk
// moves/splits depending on the timing.

// Test is not valid for debug build, heuristics get all mangled by debug reload behavior
var isDebugBuild = st.s0.getDB("admin").serverBuildInfo().debug;

if (!isDebugBuild) {
    var mongos = st.s0;
    var config = mongos.getDB("config");
    var admin = mongos.getDB("admin");
    var coll = mongos.getCollection("foo.hashBar");

    assert.commandWorked(admin.runCommand({enableSharding: coll.getDB() + ""}));
    assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {_id: 1}}));

    var numChunks = 10;

    // Split off the low and high chunks, to get non-special-case behavior
    assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 0}}));
    assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: numChunks + 1}}));

    // Split all the other chunks, and an extra chunk. We need the extra chunk to compensate for
    // the fact that the chunk differ resets the highest chunk's (i.e. the last-split-chunk's)
    // data count on reload.
    for (var i = 1; i < numChunks + 1; i++) {
        assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: i}}));
    }

    jsTest.log("Setup collection...");
    st.printShardingStatus(true);
    var pad = (new Array(1024)).join(' ');
    var approxSize = Object.bsonsize({_id: 0.0, pad: pad});

    jsTest.log("Starting inserts of approx size: " + approxSize + "...");

    var chunkSizeBytes = 1024 * 1024;

    // We insert slightly more than the max number of docs per chunk, to test
    // if resetting the chunk size happens during reloads.  If the size is
    // reset, we'd expect to split less, since the first split would then
    // disable further splits (statistically, since the decision is randomized).
    // We choose 1.4 since split attempts happen about once every 1/5 chunkSize,
    // and we want to be sure we def get a split attempt at a full chunk.
    var insertsForSplit = Math.ceil((chunkSizeBytes * 1.4) / approxSize);
    var totalInserts = insertsForSplit * numChunks;

    printjson({
        chunkSizeBytes: chunkSizeBytes,
        insertsForSplit: insertsForSplit,
        totalInserts: totalInserts
    });

    // Insert enough docs to trigger splits into all chunks
    for (var i = 0; i < totalInserts; i++) {
        assert.commandWorked(coll.insert({_id: i % numChunks + (i / totalInserts), pad: pad}));
        // Splitting is asynchronous so we should wait after each insert
        // for autosplitting to happen
        waitForOngoingChunkSplits(st);
    }

    jsTest.log("Inserts completed...");

    st.printShardingStatus(true);
    printjson(coll.stats());

    // Check that all chunks (except the two extreme chunks)
    // have been split at least once + 1 extra chunk as reload buffer
    assert.gte(config.chunks.count({"ns": "foo.hashBar"}), numChunks * 2 + 3);

    jsTest.log("DONE!");

} else {
    jsTest.log("Disabled test in debug builds.");
}

st.stop();
})();