summaryrefslogtreecommitdiff
path: root/jstests/replsets/nodes_eventually_sync_from_closer_data_center.js
blob: 8ba24bdaa9b40a94a0de0bb3a909f373d5fe89ca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/**
 * Tests that nodes that are syncing from a node in a far away data center will eventually choose to
 * sync from a node in a closer data center. We do this with a three node replica set (P, S1, and
 * S2). We create a data center for each node and configure delays such that P's data center is
 * significantly farther away from S2 than S1's data center. Finally, we verify that S2 will decide
 * to sync from S1, since S1's data center is closer.
 *
 * @tags: [
 * ]
 */

(function() {
"use strict";

load("jstests/libs/fail_point_util.js");
load("jstests/replsets/libs/sync_source.js");
load('jstests/replsets/rslib.js');

const name = jsTestName();
const rst = new ReplSetTest({
    name,
    nodes: [{}, {rsConfig: {priority: 0}}, {rsConfig: {priority: 0}}],
    nodeOptions: {
        setParameter: {
            // Set 'maxNumSyncSourceChangesPerHour' to a high value to remove the limit on how many
            // times nodes change sync sources in an hour.
            maxNumSyncSourceChangesPerHour: 99,
        }
    },
    settings: {
        // Set the heartbeat interval to a low value to reduce the amount of time spent waiting for
        // a heartbeat from sync source candidates.
        heartbeatIntervalMillis: 250,
    },
    useBridge: true
});

rst.startSet();
rst.initiateWithHighElectionTimeout();

const primary = rst.getPrimary();
const centralSecondary = rst.getSecondaries()[0];
const testNode = rst.getSecondaries()[1];

// The default WC is majority and this test can't satisfy majority writes.
assert.commandWorked(primary.adminCommand(
    {setDefaultRWConcern: 1, defaultWriteConcern: {w: 1}, writeConcern: {w: "majority"}}));
rst.awaitReplication();

const primaryDB = primary.getDB(name);
const primaryColl = primaryDB["testColl"];

assert.commandWorked(primaryColl.insert({"steady": "state"}, {writeConcern: {w: 3}}));

// Ensure we see the sync source progress logs.
setLogVerbosity(rst.nodes, {"replication": {"verbosity": 2}});

let serverStatus = assert.commandWorked(testNode.adminCommand({serverStatus: 1})).metrics.repl;
const numSyncSourceChanges =
    serverStatus.syncSource.numSyncSourceChangesDueToSignificantlyCloserNode;

jsTestLog("Forcing sync sources for the secondaries");
forceSyncSource(rst, centralSecondary, primary);
// Force the test node to sync from the primary in the far away data center.
const testNodeForceSyncSource = forceSyncSource(rst, testNode, primary);

const westDC = new DataCenter("west", [primary]);
const centralDC = new DataCenter("centralDC", [centralSecondary]);
const eastDC = new DataCenter("east", [testNode]);

// Set the delay for adjacent data centers to 50 ms. Since the west data center and east data center
// are further apart, we delay messages between them by 300 ms.
delayMessagesBetweenDataCenters(westDC, centralDC, 50 /* delayMillis */);
delayMessagesBetweenDataCenters(centralDC, eastDC, 50 /* delayMillis */);
delayMessagesBetweenDataCenters(westDC, eastDC, 300 /* delayMillis */);

// Hang 'testNode' in the oplog fetcher to ensure that sync source candidates are ahead of us.
const hangOplogFetcherBeforeAdvancingLastFetched =
    configureFailPoint(testNode, "hangOplogFetcherBeforeAdvancingLastFetched");

// Do a write to reduce the time spent waiting for a batch.
assert.commandWorked(primaryColl.insert({"make": "batch"}, {writeConcern: {w: 2}}));

hangOplogFetcherBeforeAdvancingLastFetched.wait();
testNodeForceSyncSource.off();

const advancedTimestamp =
    assert.commandWorked(primaryColl.runCommand("insert", {documents: [{"advance": "timestamp"}]}))
        .operationTime;
jsTestLog(
    `Waiting for 'testNode' to receive heartbeats. The target sync source should have advanced its optime to ${
        tojson(advancedTimestamp)}`);
assert.soon(() => {
    const replSetGetStatus = assert.commandWorked(testNode.adminCommand({replSetGetStatus: 1}));

    // Wait for a heartbeat from the target sync source that shows that the target sync source's
    // last timestamp is at least 'advancedTimestamp'. This ensures the test node sees that the
    // target sync source is ahead of itself, and as a result, it can decide to sync from the target
    // sync source.
    const centralTimestamp = replSetGetStatus.members[1].optime.ts;
    const receivedCentralHb = (bsonWoCompare(centralTimestamp, advancedTimestamp) >= 0);

    // Wait for enough heartbeats from the test node's current sync source so that our understanding
    // of the ping time is over 60 ms. This makes it likely to re-evaluate the sync source.
    const syncSourcePingTime = replSetGetStatus.members[0].pingMs;
    const receivedSyncSourceHb = (syncSourcePingTime > 60);

    return (receivedCentralHb && receivedSyncSourceHb);
});

const replSetGetStatus = assert.commandWorked(testNode.adminCommand({replSetGetStatus: 1}));
jsTestLog(replSetGetStatus);

hangOplogFetcherBeforeAdvancingLastFetched.off();

jsTestLog("Verifying that the node eventually syncs from centralSecondary");
rst.awaitSyncSource(testNode, centralSecondary);

// Verify that the metric was incremented correctly.
serverStatus = assert.commandWorked(testNode.adminCommand({serverStatus: 1})).metrics.repl;
assert.eq(numSyncSourceChanges + 1,
          serverStatus.syncSource.numSyncSourceChangesDueToSignificantlyCloserNode);

rst.stopSet();
})();