1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
/**
* Tests that in a tenant migration, the recipient primary will resume oplog application on
* failover.
* @tags: [
* incompatible_with_eft,
* incompatible_with_macos,
* incompatible_with_shard_merge,
* incompatible_with_windows_tls,
* requires_majority_read_concern,
* requires_persistence,
* ]
*/
(function() {
"use strict";
load("jstests/libs/fail_point_util.js");
load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject'
load("jstests/libs/parallelTester.js"); // for 'Thread'
load("jstests/libs/write_concern_util.js"); // for 'stopReplicationOnSecondaries'
load("jstests/aggregation/extras/utils.js"); // For assertArrayEq.
load("jstests/replsets/libs/tenant_migration_test.js");
load("jstests/replsets/libs/tenant_migration_util.js");
const recipientRst = new ReplSetTest({
nodes: 3,
name: jsTestName() + "_recipient",
// Use a batch size of 2 so that we can hang in the middle of tenant oplog application.
nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient,
{setParameter: {tenantApplierBatchSizeOps: 2}})
});
recipientRst.startSet();
recipientRst.initiate();
const tenantMigrationTest =
new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst});
const tenantId = "testTenantId";
const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
const collName = "testColl";
const donorPrimary = tenantMigrationTest.getDonorPrimary();
const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
const donorRst = tenantMigrationTest.getDonorRst();
const donorTestColl = donorPrimary.getDB(dbName).getCollection(collName);
// Populate the donor replica set with some initial data and make sure it is majority committed.
const majorityCommittedDocs = [{_id: 0, x: 0}, {_id: 1, x: 1}];
assert.commandWorked(donorTestColl.insert(majorityCommittedDocs, {writeConcern: {w: "majority"}}));
assert.eq(2, donorTestColl.find().readConcern("majority").itcount());
const migrationId = UUID();
const migrationOpts = {
migrationIdString: extractUUIDFromObject(migrationId),
recipientConnString: tenantMigrationTest.getRecipientConnString(),
tenantId: tenantId,
};
// Configure fail point to have the recipient primary hang after the cloner completes and the oplog
// applier has started.
let waitAfterDatabaseClone = configureFailPoint(
recipientPrimary, "fpAfterStartingOplogApplierMigrationRecipientInstance", {action: "hang"});
// Configure fail point to hang the tenant oplog applier after it applies the first batch.
let waitInOplogApplier = configureFailPoint(recipientPrimary, "hangInTenantOplogApplication");
// Start a migration and wait for recipient to hang in the tenant database cloner.
const donorRstArgs = TenantMigrationUtil.createRstArgs(donorRst);
const migrationThread =
new Thread(TenantMigrationUtil.runMigrationAsync, migrationOpts, donorRstArgs);
migrationThread.start();
waitAfterDatabaseClone.wait();
// Insert some writes that will eventually be picked up by the tenant oplog applier on the
// recipient.
const docsToApply = [{_id: 2, x: 2}, {_id: 3, x: 3}, {_id: 4, x: 4}];
tenantMigrationTest.insertDonorDB(dbName, collName, docsToApply);
// Wait for the applied oplog batch to be replicated.
waitInOplogApplier.wait();
recipientRst.awaitReplication();
let local = recipientPrimary.getDB("local");
let appliedNoOps = local.oplog.rs.find({fromTenantMigration: migrationId, op: "n"});
let resultsArr = appliedNoOps.toArray();
// It is possible that the first batch applied includes a resume no-op token. We do not write no-op
// entries for resume token entries in tenant migrations.
assert.gt(appliedNoOps.count(), 0, resultsArr);
assert.lte(appliedNoOps.count(), 2, resultsArr);
assert.eq(docsToApply[0], resultsArr[0].o2.o, resultsArr);
if (appliedNoOps.count() === 2) {
assert.eq(docsToApply[1], resultsArr[1].o2.o, resultsArr);
}
// Step up a new node in the recipient set and trigger a failover. The new primary should resume
// fetching starting from the unapplied documents.
const newRecipientPrimary = recipientRst.getSecondaries()[0];
assert.commandWorked(newRecipientPrimary.adminCommand({replSetStepUp: 1}));
waitAfterDatabaseClone.off();
waitInOplogApplier.off();
recipientRst.getPrimary();
// The migration should go through after recipient failover.
TenantMigrationTest.assertCommitted(migrationThread.returnData());
// Validate that the last no-op entry is applied.
local = newRecipientPrimary.getDB("local");
appliedNoOps = local.oplog.rs.find({fromTenantMigration: migrationId, op: "n"});
resultsArr = appliedNoOps.toArray();
assert.eq(3, appliedNoOps.count(), appliedNoOps);
assert.eq(docsToApply[2], resultsArr[2].o2.o, resultsArr);
TenantMigrationUtil.checkTenantDBHashes(
tenantMigrationTest.getDonorRst(), tenantMigrationTest.getRecipientRst(), tenantId);
tenantMigrationTest.stop();
recipientRst.stopSet();
})();
|