summaryrefslogtreecommitdiff
path: root/jstests/replsets/server_status_metrics.js
blob: 935b454d92a0645f569874a507c3fbb5999b5081 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/**
 * Test replication metrics
 */
function _testSecondaryMetricsHelper(secondary, opCount, offset) {
    var ss = secondary.getDB("test").serverStatus();
    printjson(ss.metrics);

    assert(ss.metrics.repl.network.readersCreated > 0, "no (oplog) readers created");
    assert(ss.metrics.repl.network.getmores.num > 0, "no getmores");
    assert(ss.metrics.repl.network.getmores.totalMillis > 0, "no getmores time");
    // The first oplog entry may or may not make it into network.ops now that we have two
    // n ops (initiate and new primary) before steady replication starts.
    // Sometimes, we disconnect from our sync source and since our find is a gte query, we may
    // double count an oplog entry, so we need some wiggle room for that.
    assert.lte(ss.metrics.repl.network.ops, opCount + offset + 5, "wrong number of ops retrieved");
    assert.gte(ss.metrics.repl.network.ops, opCount + offset, "wrong number of ops retrieved");
    assert(ss.metrics.repl.network.bytes > 0, "zero or missing network bytes");

    assert(ss.metrics.repl.buffer.count >= 0, "buffer count missing");
    assert(ss.metrics.repl.buffer.sizeBytes >= 0, "size (bytes)] missing");
    assert(ss.metrics.repl.buffer.maxSizeBytes >= 0, "maxSize (bytes) missing");

    assert(ss.metrics.repl.preload.docs.num >= 0, "preload.docs num  missing");
    assert(ss.metrics.repl.preload.docs.totalMillis >= 0, "preload.docs time missing");
    assert(ss.metrics.repl.preload.docs.num >= 0, "preload.indexes num missing");
    assert(ss.metrics.repl.preload.indexes.totalMillis >= 0, "preload.indexes time missing");

    assert.eq(ss.metrics.repl.apply.batchSize, opCount + offset, "apply ops batch size mismatch");
    assert(ss.metrics.repl.apply.batches.num > 0, "no batches");
    assert(ss.metrics.repl.apply.batches.totalMillis >= 0, "missing batch time");
    assert.eq(ss.metrics.repl.apply.ops, opCount + offset, "wrong number of applied ops");
}

// Metrics are racy, e.g. repl.buffer.count could over- or under-reported briefly. Retry on error.
function testSecondaryMetrics(secondary, opCount, baseOpsApplied, baseOpsReceived) {
    assert.soon(() => {
        try {
            _testSecondaryMetricsHelper(secondary, opCount, baseOpsApplied, baseOpsReceived);
            return true;
        } catch (exc) {
            jsTestLog(`Caught ${exc}, retrying`);
            return false;
        }
    });
}

var rt = new ReplSetTest({name: "server_status_metrics", nodes: 2, oplogSize: 100});
rt.startSet();
rt.initiate();

rt.awaitSecondaryNodes();

var secondary = rt.getSecondary();
var primary = rt.getPrimary();
var testDB = primary.getDB("test");

assert.commandWorked(testDB.createCollection('a'));
assert.writeOK(testDB.b.insert({}, {writeConcern: {w: 2}}));

var ss = secondary.getDB("test").serverStatus();
var secondaryBaseOplogInserts = ss.metrics.repl.apply.ops;

// add test docs
var bulk = testDB.a.initializeUnorderedBulkOp();
for (x = 0; x < 1000; x++) {
    bulk.insert({});
}
assert.writeOK(bulk.execute({w: 2}));

testSecondaryMetrics(secondary, 1000, secondaryBaseOplogInserts);

var options = {writeConcern: {w: 2}, multi: true, upsert: true};
assert.writeOK(testDB.a.update({}, {$set: {d: new Date()}}, options));

testSecondaryMetrics(secondary, 2000, secondaryBaseOplogInserts);

// Test getLastError.wtime and that it only records stats for w > 1, see SERVER-9005
var startMillis = testDB.serverStatus().metrics.getLastError.wtime.totalMillis;
var startNum = testDB.serverStatus().metrics.getLastError.wtime.num;

printjson(primary.getDB("test").serverStatus().metrics);

assert.writeOK(testDB.a.insert({x: 1}, {writeConcern: {w: 1, wtimeout: 5000}}));
assert.eq(testDB.serverStatus().metrics.getLastError.wtime.totalMillis, startMillis);
assert.eq(testDB.serverStatus().metrics.getLastError.wtime.num, startNum);

assert.writeOK(testDB.a.insert({x: 1}, {writeConcern: {w: -11, wtimeout: 5000}}));
assert.eq(testDB.serverStatus().metrics.getLastError.wtime.totalMillis, startMillis);
assert.eq(testDB.serverStatus().metrics.getLastError.wtime.num, startNum);

assert.writeOK(testDB.a.insert({x: 1}, {writeConcern: {w: 2, wtimeout: 5000}}));
assert(testDB.serverStatus().metrics.getLastError.wtime.totalMillis >= startMillis);
assert.eq(testDB.serverStatus().metrics.getLastError.wtime.num, startNum + 1);

// Write will fail because there are only 2 nodes
assert.writeError(testDB.a.insert({x: 1}, {writeConcern: {w: 3, wtimeout: 50}}));
assert.eq(testDB.serverStatus().metrics.getLastError.wtime.num, startNum + 2);

printjson(primary.getDB("test").serverStatus().metrics);

rt.stopSet();