summaryrefslogtreecommitdiff
path: root/jstests/replsets/initial_sync_test_fixture_test.js
blob: 755df7a2109919177e700fc0c10796fd29d598b2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
/**
 * Test to check that the Initial Sync Test Fixture properly pauses initial sync.
 *
 * The test checks that both the collection cloning and oplog application stages of initial sync
 * pause after exactly one commad is run when the test fixture's step function is called. The test
 * issues the same listDatabases and listCollections commands that collection cloning does so we
 * know all the commands that will be run on the sync source and can verify that only one is run per
 * call to step(). Similarly for oplog application, we can check the log messages to make sure that
 * the batches being applied are of the expected size and that only one batch was applied per step()
 * call.
 *
 * @tags: [
 *   uses_prepare_transaction,
 *   uses_transactions,
 * ]
 */

(function() {
"use strict";

load("jstests/core/txns/libs/prepare_helpers.js");
load("jstests/replsets/libs/initial_sync_test.js");
load("jstests/libs/logv2_helpers.js");

/**
 * Helper function to check that specific messages appeared or did not appear in the logs.
 */
function checkLogForMsg(node, msg, contains) {
    if (contains) {
        jsTest.log("Check for presence of message (" + node.port + "): |" + msg + "|");
        assert(checkLog.checkContainsOnce(node, msg));
    } else {
        jsTest.log("Check for absence of message (" + node.port + "): |" + msg + "|");
        assert(!checkLog.checkContainsOnce(node, msg));
    }
}

/**
 * Helper function to check that specific messages appeared or did not appear in the logs. If we
 * expect the log message to appear, this will show that the node is paused after getting the
 * specified timestamp.
 */
function checkLogForGetTimestampMsg(node, timestampName, timestamp, contains) {
    let msg = "Initial Syncer got the " + timestampName + ": { ts: " + tojson(timestamp);

    checkLogForMsg(node, msg, contains);
}

/**
 * Helper function to check that specific messages appeared or did not appear in the logs. If
 * the command was listIndexes and we expect the message to appear, we also add the collection
 * UUID to make sure that it corresponds to the expected collection.
 */
function checkLogForCollectionClonerMsg(node, commandName, dbname, contains, collUUID) {
    let msg =
        "Collection Cloner scheduled a remote command on the " + dbname + " db: { " + commandName;

    if (isJsonLog(node)) {
        msg = 'Collection Cloner scheduled a remote command","attr":{"stage":"' + dbname +
            " db: { " + commandName;
    }

    if (commandName === "listIndexes" && contains) {
        msg += ": " + collUUID;
        if (isJsonLog(node)) {
            msg = msg.replace('("', '(\\"').replace('")', '\\")');
        }
    }

    checkLogForMsg(node, msg, contains);
}

/**
 * Helper function to check that the specific message appeared exactly once in the logs and that
 * there is no other message saying that the next batch is about to be applied. This will show
 * that oplog application is paused.
 */
function checkLogForOplogApplicationMsg(node, size) {
    let msg = "Initial Syncer is about to apply the next oplog batch of size: ";
    checkLog.containsWithCount(node, msg, 1, 5 * 1000);

    msg += size;
    assert(checkLog.checkContainsOnce(node, msg));
}

// Set up Initial Sync Test.
const initialSyncTest = new InitialSyncTest();
const primary = initialSyncTest.getPrimary();
let secondary = initialSyncTest.getSecondary();
const db = primary.getDB("test");
const largeString = 'z'.repeat(10 * 1024 * 1024);

assert.commandWorked(db.foo.insert({a: 1}));
assert.commandWorked(db.bar.insert({b: 1}));

// Prepare a transaction so that we know that step() can restart the secondary even if there is
// a prepared transaction. The prepareTimestamp will be used as the beginFetchingTimestamp and
// beginApplyingTimestamp during initial sync.
const session = primary.startSession({causalConsistency: false});
const sessionDB = session.getDatabase("test");
const sessionColl = sessionDB.getCollection("foo");
session.startTransaction();
assert.commandWorked(sessionColl.insert({c: 1}));
let prepareTimestamp = PrepareHelpers.prepareTransaction(session);

// This step call restarts the secondary and causes it to go into initial sync. It will pause
// initial sync after the node has fetched the defaultBeginFetchingTimestamp.
assert(!initialSyncTest.step());

secondary = initialSyncTest.getSecondary();
secondary.setSecondaryOk();

// Make sure that we cannot read from this node yet.
assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}),
                             ErrorCodes.NotPrimaryOrSecondary);

// Make sure that we see that the node got the defaultBeginFetchingTimestamp, but hasn't gotten the
// beginFetchingTimestamp yet.
checkLogForGetTimestampMsg(secondary, "defaultBeginFetchingTimestamp", prepareTimestamp, true);
checkLogForGetTimestampMsg(secondary, "beginFetchingTimestamp", prepareTimestamp, false);
checkLogForGetTimestampMsg(secondary, "beginApplyingTimestamp", prepareTimestamp, false);

// This step call will resume initial sync and pause it again after the node gets the
// beginFetchingTimestamp from its sync source.
assert(!initialSyncTest.step());

// Make sure that we cannot read from this node yet.
assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}),
                             ErrorCodes.NotPrimaryOrSecondary);

// Make sure that we see that the node got the beginFetchingTimestamp, but hasn't gotten the
// beginApplyingTimestamp yet.
checkLogForGetTimestampMsg(secondary, "defaultBeginFetchingTimestamp", prepareTimestamp, false);
checkLogForGetTimestampMsg(secondary, "beginFetchingTimestamp", prepareTimestamp, true);
checkLogForGetTimestampMsg(secondary, "beginApplyingTimestamp", prepareTimestamp, false);

// This step call will resume initial sync and pause it again after the node gets the
// beginApplyingTimestamp from its sync source.
assert(!initialSyncTest.step());

// Make sure that we cannot read from this node yet.
assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}),
                             ErrorCodes.NotPrimaryOrSecondary);

// Make sure that we see that the node got the beginApplyingTimestamp, but that we don't see the
// listDatabases call yet.
checkLogForGetTimestampMsg(secondary, "defaultBeginFetchingTimestamp", prepareTimestamp, false);
checkLogForGetTimestampMsg(secondary, "beginFetchingTimestamp", prepareTimestamp, false);
checkLogForGetTimestampMsg(secondary, "beginApplyingTimestamp", prepareTimestamp, true);
checkLogForCollectionClonerMsg(secondary, "listDatabases", "admin", false);

// This step call will resume initial sync and pause it again after the node gets the
// listDatabases result from its sync source.
assert(!initialSyncTest.step());

// Make sure that we cannot read from this node yet.
assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}),
                             ErrorCodes.NotPrimaryOrSecondary);

// Make sure that we saw the listDatabases call in the log messages, but didn't see any
// listCollections or listIndexes call.
checkLogForCollectionClonerMsg(secondary, "listDatabases", "admin", true);
checkLogForCollectionClonerMsg(secondary, "listCollections", "admin", false);
checkLogForCollectionClonerMsg(secondary, "listIndexes", "admin", false);

// Do same listDatabases command as CollectionCloner.
const databases =
    assert.commandWorked(primary.adminCommand({listDatabases: 1, nameOnly: true})).databases;

// Iterate over the databases and collections in the same order that the test fixture would so
// that we can check the log messages to make sure initial sync is paused as expected.
for (let dbObj of databases) {
    const dbname = dbObj.name;

    // We skip the local db during the collection cloning phase of initial sync.
    if (dbname === "local") {
        continue;
    }

    const database = primary.getDB(dbname);

    // Do same listCollections command as CollectionCloner.
    const res = assert.commandWorked(database.runCommand(
        {listCollections: 1, filter: {$or: [{type: "collection"}, {type: {$exists: false}}]}}));

    // Make sure that there is only one batch.
    assert.eq(NumberLong(0), res.cursor.id, res);

    const collectionsCursor = res.cursor;

    // For each database, CollectionCloner will first call listCollections.
    assert(!initialSyncTest.step());

    // Make sure that we cannot read from this node yet.
    assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}),
                                 ErrorCodes.NotPrimaryOrSecondary);

    // Make sure that we saw the listCollections call in the log messages, but didn't see a
    // listIndexes call.
    checkLogForCollectionClonerMsg(secondary, "listCollections", dbname, true);
    checkLogForCollectionClonerMsg(secondary, "listIndexes", "admin", false);

    for (let collectionObj of collectionsCursor.firstBatch) {
        assert(collectionObj.info, collectionObj);
        const collUUID = collectionObj.info.uuid;

        // For each collection, CollectionCloner will call listIndexes.
        assert(!initialSyncTest.step());

        // Make sure that we cannot read from this node yet.
        assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}),
                                     ErrorCodes.NotPrimaryOrSecondary);

        // Make sure that we saw the listIndexes call in the log messages, but didn't
        // see a listCollections call.
        checkLogForCollectionClonerMsg(secondary, "listIndexes", dbname, true, collUUID);
        checkLogForCollectionClonerMsg(secondary, "listCollections", "admin", false);

        // Perform large operations during collection cloning so that we will need multiple
        // batches during oplog application.
        assert.commandWorked(db.foo.insert({d: largeString}));
        assert.commandWorked(db.bar.insert({e: largeString}));
    }
}

// Check that we see the expected number of batches during oplog application.

// This batch should correspond to the 'prepare' op.
assert(!initialSyncTest.step());
checkLogForOplogApplicationMsg(secondary, 1);
assert(!initialSyncTest.step());
checkLogForOplogApplicationMsg(secondary, 9);
assert(!initialSyncTest.step());
checkLogForOplogApplicationMsg(secondary, 3);

assert(initialSyncTest.step(), "Expected initial sync to have completed, but it did not");

// Abort transaction so that the data consistency checks in stop() can run.
assert.commandWorked(session.abortTransaction_forTesting());

// Issue a w:2 write to make sure the secondary has replicated the abortTransaction oplog entry.
assert.commandWorked(primary.getDB("otherDB").otherColl.insert({x: 1}, {writeConcern: {w: 2}}));

// Confirm that node can be read from and that it has the inserts that were made while the node
// was in initial sync.
assert.eq(secondary.getDB("test").foo.find().count(), 7);
assert.eq(secondary.getDB("test").bar.find().count(), 7);
assert.eq(secondary.getDB("test").foo.find().itcount(), 7);
assert.eq(secondary.getDB("test").bar.find().itcount(), 7);

// Do data consistency checks at the end.
initialSyncTest.stop();
})();