summaryrefslogtreecommitdiff
path: root/jstests/sharding/health_monitor/progress_monitor.js
blob: 4ef7092868541c0c9d6bb335cea6048fb51e0af9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
/*
 *  @tags: [multiversion_incompatible]
 */
const PROGRESS_TIMEOUT_SECONDS = 5;
const CHECK_PING_SECONDS = 1;
(function() {
'use strict';

const params = {
    setParameter: {
        healthMonitoringIntensities: tojson({
            values: [
                {type: "test", intensity: "non-critical"},
                {type: "ldap", intensity: "off"},
                {type: "dns", intensity: "off"}
            ]
        }),
        healthMonitoringIntervals: tojson({values: [{type: "test", interval: NumberInt(500)}]}),
        progressMonitor:
            tojson({interval: PROGRESS_TIMEOUT_SECONDS, deadline: PROGRESS_TIMEOUT_SECONDS}),
        featureFlagHealthMonitoring: true
    }
};
let st = new ShardingTest({
    mongos: [params, params],
    shards: 1,
});
// After cluster startup, make sure both mongos's are available.
assert.commandWorked(st.s0.adminCommand({"ping": 1}));
assert.commandWorked(st.s1.adminCommand({"ping": 1}));
assert.commandWorked(st.s1.adminCommand(
    {"setParameter": 1, logComponentVerbosity: {processHealth: {verbosity: 2}}}));

// Set the failpoint on one of the mongos's to pause its healthchecks.
assert.commandWorked(
    st.s1.adminCommand({"configureFailPoint": 'hangTestHealthObserver', "mode": "alwaysOn"}));
sleep(CHECK_PING_SECONDS * 1000);
// Make sure the failpoint on its own doesn't bring down the server.
assert.commandWorked(st.s1.adminCommand({"ping": 1}));
// Wait for the progress monitor timeout to elapse.
sleep(PROGRESS_TIMEOUT_SECONDS * 1000);

assert.soon(() => {
    try {
        assert.commandWorked(st.s0.adminCommand({"ping": 1}));  // Ensure s0 is unaffected.
        st.s1.adminCommand(
            {"ping": 1});  // This should throw an error because s1 is no longer reachable.
        assert(false, "ping command to s1 should fail.");
    } catch (e) {
        // This might seem brittle to rely on the string message for the error, but the same check
        // appears in the implementation for runCommand().
        if (e.message.indexOf("network error") >= 0) {
            return true;
        } else {
            jsTestLog(`Failure: ${e}`);
            sleep(1000);
            return false;
        }
    }
    sleep(1000);
    return false;
}, "Pinging faulty mongos should fail with network error.", PROGRESS_TIMEOUT_SECONDS * 1000);
// Don't validate exit codes, since a mongos will exit on its own with a non-zero exit code.

st.stop({skipValidatingExitCode: true});
})();