summaryrefslogtreecommitdiff
path: root/jstests/replsets/tenant_migration_resume_oplog_application.js
blob: da857c9cd70d64543a72a79ccdf95482cbb6d911 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/**
 * Tests that in a tenant migration, the recipient primary will resume oplog application on
 * failover.
 * @tags: [
 *   incompatible_with_eft,
 *   incompatible_with_macos,
 *   incompatible_with_windows_tls,
 *   requires_majority_read_concern,
 *   requires_persistence,
 * ]
 */

(function() {
"use strict";

load("jstests/libs/fail_point_util.js");
load("jstests/libs/uuid_util.js");            // for 'extractUUIDFromObject'
load("jstests/libs/parallelTester.js");       // for 'Thread'
load("jstests/libs/write_concern_util.js");   // for 'stopReplicationOnSecondaries'
load("jstests/aggregation/extras/utils.js");  // For assertArrayEq.
load("jstests/replsets/libs/tenant_migration_test.js");
load("jstests/replsets/libs/tenant_migration_util.js");

const recipientRst = new ReplSetTest({
    nodes: 3,
    name: jsTestName() + "_recipient",
    // Use a batch size of 2 so that we can hang in the middle of tenant oplog application.
    nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient,
                               {setParameter: {tenantApplierBatchSizeOps: 2}})
});

recipientRst.startSet();
recipientRst.initiate();

const tenantMigrationTest =
    new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst});

const tenantId = "testTenantId";
const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
const collName = "testColl";

const donorPrimary = tenantMigrationTest.getDonorPrimary();
const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
const donorRst = tenantMigrationTest.getDonorRst();
const donorTestColl = donorPrimary.getDB(dbName).getCollection(collName);

// Populate the donor replica set with some initial data and make sure it is majority committed.
const majorityCommittedDocs = [{_id: 0, x: 0}, {_id: 1, x: 1}];
assert.commandWorked(donorTestColl.insert(majorityCommittedDocs, {writeConcern: {w: "majority"}}));
assert.eq(2, donorTestColl.find().readConcern("majority").itcount());

const migrationId = UUID();
const migrationOpts = {
    migrationIdString: extractUUIDFromObject(migrationId),
    recipientConnString: tenantMigrationTest.getRecipientConnString(),
    tenantId: tenantId,
};

// Configure fail point to have the recipient primary hang after the cloner completes and the oplog
// applier has started.
let waitAfterDatabaseClone = configureFailPoint(
    recipientPrimary, "fpAfterStartingOplogApplierMigrationRecipientInstance", {action: "hang"});
// Configure fail point to hang the tenant oplog applier after it applies the first batch.
let waitInOplogApplier = configureFailPoint(recipientPrimary, "hangInTenantOplogApplication");

// Start a migration and wait for recipient to hang in the tenant database cloner.
const donorRstArgs = TenantMigrationUtil.createRstArgs(donorRst);
const migrationThread =
    new Thread(TenantMigrationUtil.runMigrationAsync, migrationOpts, donorRstArgs);
migrationThread.start();
waitAfterDatabaseClone.wait();

// Insert some writes that will eventually be picked up by the tenant oplog applier on the
// recipient.
const docsToApply = [{_id: 2, x: 2}, {_id: 3, x: 3}, {_id: 4, x: 4}];
tenantMigrationTest.insertDonorDB(dbName, collName, docsToApply);

// Wait for the applied oplog batch to be replicated.
waitInOplogApplier.wait();
recipientRst.awaitReplication();
let local = recipientPrimary.getDB("local");
let appliedNoOps = local.oplog.rs.find({fromTenantMigration: migrationId, op: "n"});
let resultsArr = appliedNoOps.toArray();
// It is possible that the first batch applied includes a resume no-op token. We do not write no-op
// entries for resume token entries in tenant migrations.
assert.gt(appliedNoOps.count(), 0, resultsArr);
assert.lte(appliedNoOps.count(), 2, resultsArr);
assert.eq(docsToApply[0], resultsArr[0].o2.o, resultsArr);
if (appliedNoOps.count() === 2) {
    assert.eq(docsToApply[1], resultsArr[1].o2.o, resultsArr);
}
// Step up a new node in the recipient set and trigger a failover. The new primary should resume
// fetching starting from the unapplied documents.
const newRecipientPrimary = recipientRst.getSecondaries()[0];
assert.commandWorked(newRecipientPrimary.adminCommand({replSetStepUp: 1}));
waitAfterDatabaseClone.off();
waitInOplogApplier.off();
recipientRst.getPrimary();

// The migration should go through after recipient failover.
TenantMigrationTest.assertCommitted(migrationThread.returnData());
// Validate that the last no-op entry is applied.
local = newRecipientPrimary.getDB("local");
appliedNoOps = local.oplog.rs.find({fromTenantMigration: migrationId, op: "n"});
resultsArr = appliedNoOps.toArray();
assert.eq(3, appliedNoOps.count(), appliedNoOps);
assert.eq(docsToApply[2], resultsArr[2].o2.o, resultsArr);

TenantMigrationUtil.checkTenantDBHashes(
    tenantMigrationTest.getDonorRst(), tenantMigrationTest.getRecipientRst(), tenantId);
tenantMigrationTest.stop();
recipientRst.stopSet();
})();