jstests/sharding/autosplit_heuristics.js


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86

//
// Tests autosplitting heuristics, and that the heuristic counting of chunk sizes
// works as expected even after splitting.
//

var st = new ShardingTest({
    shards: 1,
    mongos: 1,
    other: {mongosOptions: {chunkSize: 1, verbose: 2}, enableAutoSplit: true}
});

// The balancer is by default stopped, thus it will NOT interfere unpredictably with the chunk
// moves/splits depending on the timing.

// Test is not valid for debug build, heuristics get all mangled by debug reload behavior
var isDebugBuild = st.s0.getDB("admin").serverBuildInfo().debug;

if (!isDebugBuild) {
    var mongos = st.s0;
    var config = mongos.getDB("config");
    var admin = mongos.getDB("admin");
    var coll = mongos.getCollection("foo.hashBar");

    printjson(admin.runCommand({enableSharding: coll.getDB() + ""}));
    printjson(admin.runCommand({shardCollection: coll + "", key: {_id: 1}}));

    var numChunks = 10;

    // Split off the low and high chunks, to get non-special-case behavior
    printjson(admin.runCommand({split: coll + "", middle: {_id: 0}}));
    printjson(admin.runCommand({split: coll + "", middle: {_id: numChunks + 1}}));

    // Split all the other chunks, and an extra chunk
    // We need the extra chunk to compensate for the fact that the chunk differ resets the highest
    // chunk's (i.e. the last-split-chunk's) data count on reload.
    for (var i = 1; i < numChunks + 1; i++) {
        printjson(admin.runCommand({split: coll + "", middle: {_id: i}}));
    }

    jsTest.log("Setup collection...");
    st.printShardingStatus(true);

    var approxSize = Object.bsonsize({_id: 0.0});

    jsTest.log("Starting inserts of approx size: " + approxSize + "...");

    var chunkSizeBytes = 1024 * 1024;

    // We insert slightly more than the max number of docs per chunk, to test
    // if resetting the chunk size happens during reloads.  If the size is
    // reset, we'd expect to split less, since the first split would then
    // disable further splits (statistically, since the decision is randomized).
    // We choose 1.4 since split attempts happen about once every 1/5 chunksize,
    // and we want to be sure we def get a split attempt at a full chunk.
    var insertsForSplit = Math.ceil((chunkSizeBytes * 1.4) / approxSize);
    var totalInserts = insertsForSplit * numChunks;

    printjson({
        chunkSizeBytes: chunkSizeBytes,
        insertsForSplit: insertsForSplit,
        totalInserts: totalInserts
    });

    // Insert enough docs to trigger splits into all chunks
    var bulk = coll.initializeUnorderedBulkOp();
    for (var i = 0; i < totalInserts; i++) {
        bulk.insert({_id: i % numChunks + (i / totalInserts)});
    }
    assert.writeOK(bulk.execute());

    jsTest.log("Inserts completed...");

    st.printShardingStatus(true);
    printjson(coll.stats());

    // Check that all chunks (except the two extreme chunks)
    // have been split at least once + 1 extra chunk as reload buffer
    assert.gte(config.chunks.count(), numChunks * 2 + 3);

    jsTest.log("DONE!");

} else {
    jsTest.log("Disabled test in debug builds.");
}

st.stop();