summaryrefslogtreecommitdiff
path: root/jstests/replsets/tenant_migration_donor_initial_sync_recovery.js
blob: 8afc2d2fcf93e335022544766eaad2b9151f334d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/**
 * Tests that tenant migration donor's in memory state is initialized correctly on initial sync.
 * This test randomly selects a point during the migration to add a node to the donor replica set.
 *
 * @tags: [
 *   incompatible_with_macos,
 *   incompatible_with_windows_tls,
 *   requires_fcv_62,
 *   requires_majority_read_concern,
 *   requires_persistence,
 *   serverless,
 * ]
 */

(function() {
"use strict";

load("jstests/libs/fail_point_util.js");
load("jstests/libs/uuid_util.js");
load("jstests/libs/parallelTester.js");
load("jstests/libs/write_concern_util.js");
load("jstests/replsets/libs/tenant_migration_test.js");
const {ServerlessLockType, getServerlessOperationLock} = TenantMigrationUtil;

const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()});

const kMaxSleepTimeMS = 1000;
const kTenantId = 'testTenantId';

let donorPrimary = tenantMigrationTest.getDonorPrimary();

// Force the migration to pause after entering a randomly selected state.
Random.setRandomSeed();
const kMigrationFpNames = [
    "pauseTenantMigrationBeforeLeavingDataSyncState",
    "pauseTenantMigrationBeforeLeavingBlockingState",
    "abortTenantMigrationBeforeLeavingBlockingState"
];
let fp;
const index = Random.randInt(kMigrationFpNames.length + 1);
if (index < kMigrationFpNames.length) {
    fp = configureFailPoint(donorPrimary, kMigrationFpNames[index]);
}

const donorRst = tenantMigrationTest.getDonorRst();
const hangInDonorAfterReplicatingKeys =
    configureFailPoint(donorRst.getPrimary(), "pauseTenantMigrationAfterFetchingAndStoringKeys");
const migrationOpts = {
    migrationIdString: extractUUIDFromObject(UUID()),
    tenantId: kTenantId
};

assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
// We must wait for the migration to have finished replicating the recipient keys on the donor set
// before starting initial sync, otherwise the migration will hang while waiting for initial sync to
// complete. We wait for the keys to be replicated with 'w: all' write concern.
hangInDonorAfterReplicatingKeys.wait();

// Add the initial sync node and make sure that it does not step up. We must add this node before
// sending the first 'recipientSyncData' command to avoid the scenario where a new donor node is
// added in-between 'recipientSyncData' commands to the recipient, prompting a
// 'ConflictingOperationInProgress' error. We do not support reconfigs that add/removes nodes during
// a migration.
const initialSyncNode = donorRst.add({
    rsConfig: {priority: 0, votes: 0},
    setParameter: {"failpoint.initialSyncHangBeforeChoosingSyncSource": tojson({mode: "alwaysOn"})}
});
donorRst.reInitiate();
donorRst.waitForState(initialSyncNode, ReplSetTest.State.STARTUP_2);
// Resume the migration. Wait randomly before resuming initial sync on the new secondary to test
// the various migration states.
hangInDonorAfterReplicatingKeys.off();
sleep(Math.random() * kMaxSleepTimeMS);
if (fp) {
    fp.wait();
}

jsTestLog("Waiting for initial sync to finish: " + initialSyncNode.port);
initialSyncNode.getDB('admin').adminCommand(
    {configureFailPoint: 'initialSyncHangBeforeChoosingSyncSource', mode: "off"});
donorRst.awaitSecondaryNodes();
donorRst.awaitReplication();

// Stop replication on the node so that the TenantMigrationAccessBlocker cannot transition its state
// past what is reflected in the state doc read below.
stopServerReplication(initialSyncNode);

let configDonorsColl = initialSyncNode.getCollection(TenantMigrationTest.kConfigDonorsNS);
assert.lte(configDonorsColl.count(), 1);
let donorDoc = configDonorsColl.findOne();
if (donorDoc) {
    jsTestLog("Initial sync completed while migration was in state: " + donorDoc.state);
    switch (donorDoc.state) {
        case TenantMigrationTest.DonorState.kAbortingIndexBuilds:
        case TenantMigrationTest.DonorState.kDataSync:
            assert.soon(() => tenantMigrationTest
                                  .getTenantMigrationAccessBlocker(
                                      {donorNode: initialSyncNode, tenantId: kTenantId})
                                  .donor.state == TenantMigrationTest.DonorAccessState.kAllow);
            break;
        case TenantMigrationTest.DonorState.kBlocking:
            assert.soon(() => tenantMigrationTest
                                  .getTenantMigrationAccessBlocker(
                                      {donorNode: initialSyncNode, tenantId: kTenantId})
                                  .donor.state ==
                            TenantMigrationTest.DonorAccessState.kBlockWritesAndReads);
            assert.soon(() =>
                            bsonWoCompare(tenantMigrationTest
                                              .getTenantMigrationAccessBlocker(
                                                  {donorNode: initialSyncNode, tenantId: kTenantId})
                                              .donor.blockTimestamp,
                                          donorDoc.blockTimestamp) == 0);
            break;
        case TenantMigrationTest.DonorState.kCommitted:
            assert.soon(() => tenantMigrationTest
                                  .getTenantMigrationAccessBlocker(
                                      {donorNode: initialSyncNode, tenantId: kTenantId})
                                  .donor.state == TenantMigrationTest.DonorAccessState.kReject);
            assert.soon(() =>
                            bsonWoCompare(tenantMigrationTest
                                              .getTenantMigrationAccessBlocker(
                                                  {donorNode: initialSyncNode, tenantId: kTenantId})
                                              .donor.commitOpTime,
                                          donorDoc.commitOrAbortOpTime) == 0);
            assert.soon(() =>
                            bsonWoCompare(tenantMigrationTest
                                              .getTenantMigrationAccessBlocker(
                                                  {donorNode: initialSyncNode, tenantId: kTenantId})
                                              .donor.blockTimestamp,
                                          donorDoc.blockTimestamp) == 0);
            break;
        case TenantMigrationTest.DonorState.kAborted:
            assert.soon(() => tenantMigrationTest
                                  .getTenantMigrationAccessBlocker(
                                      {donorNode: initialSyncNode, tenantId: kTenantId})
                                  .donor.state == TenantMigrationTest.DonorAccessState.kAborted);
            assert.soon(() =>
                            bsonWoCompare(tenantMigrationTest
                                              .getTenantMigrationAccessBlocker(
                                                  {donorNode: initialSyncNode, tenantId: kTenantId})
                                              .donor.abortOpTime,
                                          donorDoc.commitOrAbortOpTime) == 0);
            assert.soon(() =>
                            bsonWoCompare(tenantMigrationTest
                                              .getTenantMigrationAccessBlocker(
                                                  {donorNode: initialSyncNode, tenantId: kTenantId})
                                              .donor.blockTimestamp,
                                          donorDoc.blockTimestamp) == 0);
            break;
        default:
            throw new Error(`Invalid state "${state}" from donor doc.`);
    }
}

const activeServerlessLock = getServerlessOperationLock(initialSyncNode);
if (donorDoc && !donorDoc.expireAt) {
    assert.eq(activeServerlessLock, ServerlessLockType.TenantMigrationDonor);
} else {
    assert.eq(activeServerlessLock, ServerlessLockType.None);
}

if (fp) {
    fp.off();
}

restartServerReplication(initialSyncNode);

if (kMigrationFpNames[index] === "abortTenantMigrationBeforeLeavingBlockingState") {
    TenantMigrationTest.assertAborted(
        tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
} else {
    TenantMigrationTest.assertCommitted(
        tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
}
assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
tenantMigrationTest.stop();
})();