summaryrefslogtreecommitdiff
path: root/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js
blob: 13eef15e0a9d5ca69a21dc51c37de71b86f89c93 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/**
 * Tests that in tenant migration, the recipient set can resume collection cloning from the last
 * document cloned after a failover.
 * @tags: [
 *   incompatible_with_macos,
 *   incompatible_with_shard_merge,
 *   incompatible_with_windows_tls,
 *   requires_majority_read_concern,
 *   requires_persistence,
 *   serverless,
 * ]
 */

(function() {
"use strict";

const tenantMigrationFailoverTest = function(isTimeSeries, createCollFn, docs) {
    load("jstests/libs/fail_point_util.js");
    load("jstests/libs/uuid_util.js");  // for 'extractUUIDFromObject'
    load("jstests/replsets/libs/tenant_migration_test.js");
    load("jstests/replsets/libs/tenant_migration_util.js");

    const batchSize = 2;
    const recipientRst = new ReplSetTest({
        nodes: 2,
        name: jsTestName() + "_recipient",
        nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient, {
            setParameter: {
                // Use a batch size of 2 so that collection cloner requires more than a single
                // batch to complete.
                collectionClonerBatchSize: batchSize,
                // Allow reads on recipient before migration completes for testing.
                'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}),
            }
        })
    });

    recipientRst.startSet();
    recipientRst.initiate();

    const tenantMigrationTest =
        new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst});
    const donorPrimary = tenantMigrationTest.getDonorPrimary();

    const tenantId = "testTenantId";
    const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
    const donorDB = donorPrimary.getDB(dbName);
    const collName = "testColl";

    const recipientPrimary = tenantMigrationTest.getRecipientPrimary();

    // Create collection and insert documents.
    assert.commandWorked(createCollFn(donorDB, collName));
    tenantMigrationTest.insertDonorDB(dbName, collName, docs);

    const migrationId = UUID();
    const migrationIdString = extractUUIDFromObject(migrationId);
    const migrationOpts = {
        migrationIdString: migrationIdString,
        recipientConnString: tenantMigrationTest.getRecipientConnString(),
        tenantId: tenantId,
    };

    // Configure a fail point to have the recipient primary hang after cloning 2 documents.
    const recipientDb = recipientPrimary.getDB(dbName);
    let recipientColl = isTimeSeries ? recipientDb.getCollection("system.buckets." + collName)
                                     : recipientDb.getCollection(collName);

    const hangDuringCollectionClone =
        configureFailPoint(recipientDb,
                           "tenantMigrationHangCollectionClonerAfterHandlingBatchResponse",
                           {nss: recipientColl.getFullName()});

    // Start a migration and wait for recipient to hang after cloning 2 documents.
    assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
    hangDuringCollectionClone.wait();
    assert.soon(() => recipientColl.find().itcount() === batchSize);

    // Insert some documents that will be fetched by the recipient. This is to test that on
    // failover, the fetcher will resume fetching from where it left off. The system is expected
    // to crash if the recipient fetches a duplicate oplog entry upon resuming the migration.
    tenantMigrationTest.insertDonorDB(dbName, "aNewColl", [{_id: "docToBeFetched"}]);
    assert.soon(() => {
        const configDb = recipientPrimary.getDB("config");
        const oplogBuffer = configDb.getCollection("repl.migration.oplog_" + migrationIdString);
        return oplogBuffer.find({"entry.o._id": "docToBeFetched"}).count() === 1;
    });

    // Step up a new node in the recipient set and trigger a failover. The new primary should resume
    // cloning starting from the third document.
    const newRecipientPrimary = recipientRst.getSecondaries()[0];
    recipientRst.stepUp(newRecipientPrimary);
    hangDuringCollectionClone.off();
    recipientRst.getPrimary();

    // The migration should go through after recipient failover.
    TenantMigrationTest.assertCommitted(
        tenantMigrationTest.waitForMigrationToComplete(migrationOpts));

    // Check that recipient has cloned all documents in the collection.
    recipientColl = newRecipientPrimary.getDB(dbName).getCollection(collName);
    assert.eq(docs.length, recipientColl.find().itcount());
    assert.docEq(recipientColl.find().sort({_id: 1}).toArray(), docs);
    TenantMigrationUtil.checkTenantDBHashes({
        donorRst: tenantMigrationTest.getDonorRst(),
        recipientRst: tenantMigrationTest.getRecipientRst(),
        tenantId
    });

    tenantMigrationTest.stop();
    recipientRst.stopSet();
};

jsTestLog("Running tenant migration test for time-series collection");
tenantMigrationFailoverTest(true,
                            (db, collName) => db.createCollection(
                                collName, {timeseries: {timeField: "time", metaField: "bucket"}}),
                            [
                                // Group each document in its own bucket in order to work with the
                                // collectionClonerBatchSize we set at the recipient replSet.
                                {_id: 1, time: ISODate(), bucket: "a"},
                                {_id: 2, time: ISODate(), bucket: "b"},
                                {_id: 3, time: ISODate(), bucket: "c"},
                                {_id: 4, time: ISODate(), bucket: "d"}
                            ]);

jsTestLog("Running tenant migration test for regular collection");
tenantMigrationFailoverTest(false,
                            (db, collName) => db.createCollection(collName),
                            [{_id: 0}, {_id: "string"}, {_id: UUID()}, {_id: new Date()}]);
})();