summaryrefslogtreecommitdiff
path: root/jstests/replsets/initial_sync_fails_unclean_restart.js
blob: bce44ec1d52a2783989799e5ab5bc7b8299056ce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/**
 * Tests that initial sync will abort an attempt if the sync source restarts from an unclean
 * shutdown. And the sync source node increments its rollback id after the unclean shutdown.
 *
 * This is to test resumable initial sync behavior when the sync source restarts after an unclean
 * shutdown. See SERVER-50140 for more details.
 * @tags: [requires_persistence]
 */
(function() {
"use strict";

load("jstests/libs/fail_point_util.js");
load("jstests/replsets/rslib.js");

const dbName = "test";
const collName = "coll";

const rst = new ReplSetTest({nodes: 1});
rst.startSet();
rst.initiate();

let syncSourceNode = rst.getPrimary();
const syncSourceColl = syncSourceNode.getDB(dbName)[collName];

// Insert some initial data to be cloned.
assert.commandWorked(syncSourceColl.insert([{_id: 1}, {_id: 2}, {_id: 3}]));

jsTest.log("Adding a new node to the replica set");
const initialSyncNode = rst.add({
    rsConfig: {priority: 0, votes: 0},
    setParameter: {
        'failpoint.initialSyncHangBeforeCopyingDatabases': tojson({mode: 'alwaysOn'}),
        // Wait for the cloners to finish.
        'failpoint.initialSyncHangAfterDataCloning': tojson({mode: 'alwaysOn'}),
        'numInitialSyncAttempts': 1,
    }
});
rst.reInitiate();

jsTestLog("The initialSyncNode should hang before the database cloning phase");
checkLog.contains(initialSyncNode, "initialSyncHangBeforeCopyingDatabases fail point enabled");

// Pauses the journal flusher and writes with {j: false}. So this data will be lost after the
// syncSourceNode restarts after an unclean shutdown.
const journalFp = configureFailPoint(syncSourceNode, "pauseJournalFlusherThread");
journalFp.wait();
assert.commandWorked(syncSourceColl.insert({_id: 4}));

// Hang the initialSyncNode before initial sync finishes so we can check initial sync failure.
const beforeFinishFailPoint = configureFailPoint(initialSyncNode, "initialSyncHangBeforeFinish");

jsTestLog("Resuming database cloner on the initialSyncNode");
assert.commandWorked(initialSyncNode.adminCommand(
    {configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'off'}));

jsTestLog("Waiting for data cloning to complete on the initialSyncNode");
assert.commandWorked(initialSyncNode.adminCommand({
    waitForFailPoint: "initialSyncHangAfterDataCloning",
    timesEntered: 1,
    maxTimeMS: kDefaultWaitForFailPointTimeout
}));

// Get the rollback id of the sync source before the unclean shutdown.
const rollbackIdBefore = syncSourceNode.getDB("local").system.rollback.id.findOne();

jsTestLog("Shutting down the syncSourceNode uncleanly");
rst.stop(syncSourceNode,
         9,
         {allowedExitCode: MongoRunner.EXIT_SIGKILL},
         {forRestart: true, waitPid: true});

// Make sure some retries happen due to resumable initial sync and the initial sync does not
// immediately fail while the sync source is completely down.
const nRetries = 2;
checkLog.containsWithAtLeastCount(initialSyncNode, "Trying to reconnect", nRetries);

// Restart the sync source and wait for it to become primary again.
jsTestLog("Restarting the syncSourceNode");
rst.start(syncSourceNode, {waitForConnect: true}, true /* restart */);
syncSourceNode = rst.getPrimary();

// Test that the rollback id is incremented after the unclean shutdown.
const rollbackIdAfter = syncSourceNode.getDB("local").system.rollback.id.findOne();
assert.eq(rollbackIdAfter.rollbackId,
          rollbackIdBefore.rollbackId + 1,
          () => "rollbackIdBefore: " + tojson(rollbackIdBefore) +
              " rollbackIdAfter: " + tojson(rollbackIdAfter));

jsTestLog("Resuming initial sync after the data cloning phase on the initialSyncNode");
assert.commandWorked(initialSyncNode.adminCommand(
    {configureFailPoint: "initialSyncHangAfterDataCloning", mode: "off"}));

jsTestLog("Waiting for initial sync to fail on the initialSyncNode");
beforeFinishFailPoint.wait();
const res = assert.commandWorked(initialSyncNode.adminCommand({replSetGetStatus: 1}));
// The initial sync should have failed.
assert.eq(res.initialSyncStatus.failedInitialSyncAttempts, 1, () => tojson(res.initialSyncStatus));
beforeFinishFailPoint.off();

// Get rid of the failed node so the fixture can stop properly.  We expect it to stop with
// an fassert.
assert.eq(MongoRunner.EXIT_ABRUPT, waitMongoProgram(initialSyncNode.port));
rst.remove(initialSyncNode);

rst.stopSet();
})();