summaryrefslogtreecommitdiff
path: root/jstests/replsets/tenant_migration_recipient_fetches_retryable_writes_entry_after_committed_snapshot.js
blob: 7d5745bd566db01435475cf289b0f940d1f50557 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
/**
 * Tests that the tenant migration recipient correctly reads 'config.transactions' entries from a
 * donor secondary. During secondary oplog application, updates to the same 'config.transactions'
 * entry are coalesced in a single update of the most recent retryable write statement. If the
 * majority committed snapshot of a secondary exists in the middle of a completed batch, then a
 * recipient's majority read on 'config.transactions' can miss committed retryable writes at that
 * majority commit point.
 *
 * @tags: [
 *   incompatible_with_macos,
 *   incompatible_with_windows_tls,
 *   requires_majority_read_concern,
 *   requires_persistence,
 *   serverless,
 * ]
 */

(function() {
load("jstests/replsets/libs/tenant_migration_test.js");
load("jstests/replsets/libs/tenant_migration_util.js");
load("jstests/libs/fail_point_util.js");  // For configureFailPoint().
load("jstests/libs/uuid_util.js");        // For extractUUIDFromObject().
load("jstests/libs/write_concern_util.js");

const getRecipientCurrOp = function(conn, migrationId) {
    const res = conn.adminCommand({currentOp: true, desc: "tenant recipient migration"});
    assert.eq(res.inprog.length, 1);
    const currOp = res.inprog[0];
    assert.eq(bsonWoCompare(currOp.instanceID, migrationId), 0);

    return currOp;
};

const getDonorSyncSource = function(conn, migrationId) {
    const currOp = getRecipientCurrOp(conn, migrationId);
    return currOp.donorSyncSource;
};

const getStartFetchingDonorOpTime = function(conn, migrationId) {
    const currOp = getRecipientCurrOp(conn, migrationId);
    return currOp.startFetchingDonorOpTime;
};

const oplogApplierBatchSize = 50;

const donorRst = new ReplSetTest({
    nodes: {
        n0: {},
        // Set the 'syncdelay' to 1s to speed up checkpointing. Also explicitly set the batch
        // size for oplog application to ensure the number of retryable write statements being
        // made majority committed isn't a multiple of it.
        n1: {syncdelay: 1, setParameter: {replBatchLimitOperations: oplogApplierBatchSize}},
        // Set the bgSyncOplogFetcherBatchSize to 1 oplog entry to guarantee replication
        // progress with the stopReplProducerOnDocument failpoint.
        n2: {rsConfig: {priority: 0, hidden: true}, setParameter: {bgSyncOplogFetcherBatchSize: 1}},
        n3: {rsConfig: {priority: 0, hidden: true}, setParameter: {bgSyncOplogFetcherBatchSize: 1}},
        n4: {rsConfig: {priority: 0, hidden: true}, setParameter: {bgSyncOplogFetcherBatchSize: 1}},
    },
    // Force secondaries to sync from the primary to guarantee replication progress with the
    // stopReplProducerOnDocument failpoint. Also disable primary catchup because some replicated
    // retryable write statements are intentionally not being made majority committed.
    settings: {chainingAllowed: false, catchUpTimeoutMillis: 0},
    nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().donor, {
        setParameter: {
            tenantMigrationExcludeDonorHostTimeoutMS: 30 * 1000,
            // Allow non-timestamped reads on donor after migration completes for testing.
            'failpoint.tenantMigrationDonorAllowsNonTimestampedReads': tojson({mode: 'alwaysOn'}),
        }
    }),
});
donorRst.startSet();
donorRst.initiateWithHighElectionTimeout();
const donorPrimary = donorRst.getPrimary();

const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), donorRst: donorRst});

if (TenantMigrationUtil.isShardMergeEnabled(donorPrimary.getDB("admin"))) {
    jsTestLog(
        "Skip: incompatible with featureFlagShardMerge. Only 'primary' read preference is supported.");
    donorRst.stopSet();
    tenantMigrationTest.stop();
    return;
}

const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
const kTenantId = "testTenantId";
const migrationId = UUID();
const kDbName = tenantMigrationTest.tenantDB(kTenantId, "testDB");
const kCollName = "retryable_write_secondary_oplog_application";
const kNs = `${kDbName}.${kCollName}`;

const migrationOpts = {
    migrationIdString: extractUUIDFromObject(migrationId),
    tenantId: kTenantId,
    // The recipient needs to choose a donor secondary as sync source.
    readPreference: {mode: 'secondary'},
};

const fpAfterConnectingTenantMigrationRecipientInstance = configureFailPoint(
    recipientPrimary, "fpAfterConnectingTenantMigrationRecipientInstance", {action: "hang"});

const fpBeforeWaitingForRetryableWritePreFetchMajorityCommitted = configureFailPoint(
    recipientPrimary, "fpBeforeWaitingForRetryableWritePreFetchMajorityCommitted");

// Start tenant migration and hang after recipient connects to donor sync source.
jsTestLog(`Starting tenant migration: ${tojson(migrationOpts)}`);
assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
fpAfterConnectingTenantMigrationRecipientInstance.wait();

// Recipient should connect to secondary1 as other secondaries are hidden.
const [secondary1, secondary2, secondary3, secondary4] = donorRst.getSecondaries();
const syncSourceSecondaryHost = getDonorSyncSource(recipientPrimary, migrationId);
assert.eq(syncSourceSecondaryHost, secondary1.host);

assert.commandWorked(
    donorPrimary.getCollection(kNs).insert({_id: 0, counter: 0}, {writeConcern: {w: 5}}));

// The default WC is majority and the donor replica set can't satisfy majority writes after we
// stop replication on the secondaries.
assert.commandWorked(donorPrimary.adminCommand(
    {setDefaultRWConcern: 1, defaultWriteConcern: {w: 1}, writeConcern: {w: "majority"}}));
donorRst.awaitReplication();

// Disable replication on all of the secondaries to manually control the replication progress.
const stopReplProducerFailpoints = [secondary1, secondary2, secondary3, secondary4].map(
    conn => configureFailPoint(conn, 'stopReplProducer'));

// While replication is still entirely disabled, additionally disable replication partway
// into the retryable write on other secondaries. The idea is that while secondary1 will
// apply all of the oplog entries in a single batch, other secondaries will only apply up
// to counterMajorityCommitted oplog entries.
const counterTotal = oplogApplierBatchSize;
const counterMajorityCommitted = counterTotal - 2;
jsTestLog(`counterTotal: ${counterTotal}, counterMajorityCommitted: ${counterMajorityCommitted}`);
const stopReplProducerOnDocumentFailpoints = [secondary2, secondary3, secondary4].map(
    conn => configureFailPoint(conn,
                               'stopReplProducerOnDocument',
                               {document: {"diff.u.counter": counterMajorityCommitted + 1}}));

// Perform all the retryable write statements on donor primary.
const lsid = ({id: UUID()});
assert.commandWorked(donorPrimary.getCollection(kNs).runCommand("update", {
    updates: Array.from({length: counterTotal}, () => ({q: {_id: 0}, u: {$inc: {counter: 1}}})),
    lsid,
    txnNumber: NumberLong(1),
}));

// Get the majority committed and last oplog entry of the respective retryable write statements.
const stmtTotal =
    donorPrimary.getCollection("local.oplog.rs").findOne({"o.diff.u.counter": counterTotal});
const stmtMajorityCommitted = donorPrimary.getCollection("local.oplog.rs").findOne({
    "o.diff.u.counter": counterMajorityCommitted
});

assert.neq(null, stmtTotal);
assert.neq(null, stmtMajorityCommitted);
jsTestLog(`stmtTotal timestamp: ${tojson(stmtTotal.ts)}`);
jsTestLog(`stmtMajorityCommitted timestamp: ${tojson(stmtMajorityCommitted.ts)}`);

for (const fp of stopReplProducerFailpoints) {
    fp.off();
    // Wait for secondary1 to have applied through the counterTotal retryable write statement and
    // other secondaries applied through the counterMajorityCommitted retryable write statement,
    // to guarantee that secondary1 will advance its stable_timestamp when learning of the other
    // secondaries also having applied through counterMajorityCommitted.
    assert.soon(() => {
        const {optimes: {appliedOpTime, durableOpTime}} =
            assert.commandWorked(fp.conn.adminCommand({replSetGetStatus: 1}));

        print(`${fp.conn.host}: ${tojsononeline({
            appliedOpTime,
            durableOpTime,
            stmtMajorityCommittedTimestamp: stmtMajorityCommitted.ts
        })}`);

        const stmtTarget = (fp.conn.host === secondary1.host) ? stmtTotal : stmtMajorityCommitted;

        return bsonWoCompare(appliedOpTime.ts, stmtTarget.ts) >= 0 &&
            bsonWoCompare(durableOpTime.ts, stmtTarget.ts) >= 0;
    });
}

// Wait for secondary1 to have advanced its stable timestamp, and therefore updated the
// committed snapshot.
assert.soon(() => {
    const {lastStableRecoveryTimestamp} =
        assert.commandWorked(secondary1.adminCommand({replSetGetStatus: 1}));

    print(`${secondary1.host}: ${tojsononeline({
        lastStableRecoveryTimestamp,
        stmtMajorityCommittedTimestamp: stmtMajorityCommitted.ts
    })}`);

    return bsonWoCompare(lastStableRecoveryTimestamp, stmtMajorityCommitted.ts) >= 0;
});

// Wait before tenant migration starts to wait for the retryable write pre-fetch result to be
// majority committed.
fpAfterConnectingTenantMigrationRecipientInstance.off();
fpBeforeWaitingForRetryableWritePreFetchMajorityCommitted.wait();

const startFetchingDonorOpTime = getStartFetchingDonorOpTime(recipientPrimary, migrationId);
assert.eq(startFetchingDonorOpTime.ts, stmtMajorityCommitted.ts);

// At this point, the recipient should have fetched retryable writes and put them into the
// oplog buffer.
const kOplogBufferNS = `config.repl.migration.oplog_${migrationOpts.migrationIdString}`;
const recipientOplogBuffer = recipientPrimary.getCollection(kOplogBufferNS);
jsTestLog(`oplog buffer ns: ${kOplogBufferNS}`);

// Number of entries fetched into oplog buffer is the majority committed count - 1 since we only
// fetch entries that occur before startFetchingDonorOpTime, which is equal to the commit point.
const findRes = recipientOplogBuffer.find().toArray();
const expectedCount = counterMajorityCommitted - 1;
assert.eq(
    findRes.length, expectedCount, `Incorrect number of oplog buffer entries: ${tojson(findRes)}`);

// Resume replication on all the secondaries and wait for migration to complete.
for (const fp of stopReplProducerOnDocumentFailpoints) {
    fp.off();
}

fpBeforeWaitingForRetryableWritePreFetchMajorityCommitted.off();

TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));

// After migration, verify that if we perform the same retryable write statements on the recipient,
// these statements will not be executed.
let docAfterMigration = recipientPrimary.getCollection(kNs).findOne({_id: 0});
assert.eq(docAfterMigration.counter, counterTotal);

assert.commandWorked(recipientPrimary.getCollection(kNs).runCommand("update", {
    updates: Array.from({length: counterTotal}, () => ({q: {_id: 0}, u: {$inc: {counter: 1}}})),
    lsid,
    txnNumber: NumberLong(1),
}));

// The second query should return the same result as first one, since the recipient should have
// fetched the executed retryable writes from donor.
docAfterMigration = recipientPrimary.getCollection(kNs).findOne({_id: 0});
assert.eq(docAfterMigration.counter, counterTotal);

donorRst.stopSet();
tenantMigrationTest.stop();
})();