1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
/**
* Tests that in tenant migration, the recipient set can resume collection cloning from the last
* document cloned after a failover.
* @tags: [
* incompatible_with_eft,
* incompatible_with_macos,
* incompatible_with_shard_merge,
* incompatible_with_windows_tls,
* requires_majority_read_concern,
* requires_persistence,
* ]
*/
(function() {
"use strict";
const tenantMigrationFailoverTest = function(isTimeSeries, createCollFn, docs) {
load("jstests/libs/fail_point_util.js");
load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject'
load("jstests/replsets/libs/tenant_migration_test.js");
load("jstests/replsets/libs/tenant_migration_util.js");
const batchSize = 2;
const recipientRst = new ReplSetTest({
nodes: 2,
name: jsTestName() + "_recipient",
nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient, {
setParameter: {
// Use a batch size of 2 so that collection cloner requires more than a single
// batch to complete.
collectionClonerBatchSize: batchSize,
// Allow reads on recipient before migration completes for testing.
'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}),
}
})
});
recipientRst.startSet();
recipientRst.initiate();
const tenantMigrationTest =
new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst});
const donorPrimary = tenantMigrationTest.getDonorPrimary();
const tenantId = "testTenantId";
const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
const donorDB = donorPrimary.getDB(dbName);
const collName = "testColl";
const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
// Create collection and insert documents.
assert.commandWorked(createCollFn(donorDB, collName));
tenantMigrationTest.insertDonorDB(dbName, collName, docs);
const migrationId = UUID();
const migrationIdString = extractUUIDFromObject(migrationId);
const migrationOpts = {
migrationIdString: migrationIdString,
recipientConnString: tenantMigrationTest.getRecipientConnString(),
tenantId: tenantId,
};
// Configure a fail point to have the recipient primary hang after cloning 2 documents.
const recipientDb = recipientPrimary.getDB(dbName);
let recipientColl = isTimeSeries ? recipientDb.getCollection("system.buckets." + collName)
: recipientDb.getCollection(collName);
const hangDuringCollectionClone =
configureFailPoint(recipientDb,
"tenantMigrationHangCollectionClonerAfterHandlingBatchResponse",
{nss: recipientColl.getFullName()});
// Start a migration and wait for recipient to hang after cloning 2 documents.
assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
hangDuringCollectionClone.wait();
assert.soon(() => recipientColl.find().itcount() === batchSize);
// Insert some documents that will be fetched by the recipient. This is to test that on
// failover, the fetcher will resume fetching from where it left off. The system is expected
// to crash if the recipient fetches a duplicate oplog entry upon resuming the migration.
tenantMigrationTest.insertDonorDB(dbName, "aNewColl", [{_id: "docToBeFetched"}]);
assert.soon(() => {
const configDb = recipientPrimary.getDB("config");
const oplogBuffer = configDb.getCollection("repl.migration.oplog_" + migrationIdString);
return oplogBuffer.find({"entry.o._id": "docToBeFetched"}).count() === 1;
});
// Step up a new node in the recipient set and trigger a failover. The new primary should resume
// cloning starting from the third document.
const newRecipientPrimary = recipientRst.getSecondaries()[0];
recipientRst.awaitLastOpCommitted();
assert.commandWorked(newRecipientPrimary.adminCommand({replSetStepUp: 1}));
hangDuringCollectionClone.off();
recipientRst.getPrimary();
// The migration should go through after recipient failover.
TenantMigrationTest.assertCommitted(
tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
// Check that recipient has cloned all documents in the collection.
recipientColl = newRecipientPrimary.getDB(dbName).getCollection(collName);
assert.eq(docs.length, recipientColl.find().itcount());
assert.docEq(recipientColl.find().sort({_id: 1}).toArray(), docs);
TenantMigrationUtil.checkTenantDBHashes(
tenantMigrationTest.getDonorRst(), tenantMigrationTest.getRecipientRst(), tenantId);
tenantMigrationTest.stop();
recipientRst.stopSet();
};
jsTestLog("Running tenant migration test for time-series collection");
tenantMigrationFailoverTest(true,
(db, collName) => db.createCollection(
collName, {timeseries: {timeField: "time", metaField: "bucket"}}),
[
// Group each document in its own bucket in order to work with the
// collectionClonerBatchSize we set at the recipient replSet.
{_id: 1, time: ISODate(), bucket: "a"},
{_id: 2, time: ISODate(), bucket: "b"},
{_id: 3, time: ISODate(), bucket: "c"},
{_id: 4, time: ISODate(), bucket: "d"}
]);
jsTestLog("Running tenant migration test for regular collection");
tenantMigrationFailoverTest(false,
(db, collName) => db.createCollection(collName),
[{_id: 0}, {_id: "string"}, {_id: UUID()}, {_id: new Date()}]);
})();
|