summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVesselina Ratcheva <vesselina.ratcheva@10gen.com>2021-02-02 03:15:44 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-02-09 01:56:05 +0000
commit6c3cf4a2640c0f08733df41330a8927b09744aaa (patch)
tree98b1cb9584d8b98dda03bed1027b4ac990e81583
parent4d72470b050c348ad5fae2cf46c01e09943f5070 (diff)
downloadmongo-6c3cf4a2640c0f08733df41330a8927b09744aaa.tar.gz
SERVER-53823 Require matching FCV from donor when starting a tenant migration
-rw-r--r--jstests/replsets/tenant_migration_donor_recipient_fcv_mismatch.js62
-rw-r--r--jstests/replsets/tenant_migration_donor_recipient_fcv_mismatch_after_failover.js70
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service.cpp39
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service.h5
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service_test.cpp62
5 files changed, 238 insertions, 0 deletions
diff --git a/jstests/replsets/tenant_migration_donor_recipient_fcv_mismatch.js b/jstests/replsets/tenant_migration_donor_recipient_fcv_mismatch.js
new file mode 100644
index 00000000000..3696d6e99a6
--- /dev/null
+++ b/jstests/replsets/tenant_migration_donor_recipient_fcv_mismatch.js
@@ -0,0 +1,62 @@
+/**
+ * Tests that starting a migration fails if the donor and recipient do not share the same FCV.
+ * @tags: [requires_majority_read_concern, requires_fcv_49, incompatible_with_windows_tls]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject'
+load("jstests/libs/parallelTester.js"); // for 'Thread'
+load("jstests/replsets/libs/tenant_migration_test.js");
+load("jstests/replsets/libs/tenant_migration_util.js");
+
+const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()});
+if (!tenantMigrationTest.isFeatureFlagEnabled()) {
+ jsTestLog("Skipping test because the tenant migrations feature flag is disabled");
+ return;
+}
+
+const tenantId = "testTenantId";
+const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
+const collName = "testColl";
+
+const donorPrimary = tenantMigrationTest.getDonorPrimary();
+const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
+
+tenantMigrationTest.insertDonorDB(dbName, collName);
+
+const migrationId = UUID();
+const migrationOpts = {
+ migrationIdString: extractUUIDFromObject(migrationId),
+ recipientConnString: tenantMigrationTest.getRecipientConnString(),
+ tenantId: tenantId,
+};
+
+// Configure a failpoint to have the recipient primary hang after taking note of its FCV
+// and before comparing it with that of the donor.
+const recipientDB = recipientPrimary.getDB(dbName);
+const hangAfterSavingFCV = configureFailPoint(
+ recipientDB, "fpAfterRecordingRecipientPrimaryStartingFCV", {action: "hang"});
+
+// Start a migration and wait for recipient to hang at the failpoint.
+assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
+hangAfterSavingFCV.wait();
+
+// Downgrade the FCV for the donor set and resume migration.
+assert.commandWorked(
+ donorPrimary.adminCommand({setFeatureCompatibilityVersion: lastContinuousFCV}));
+hangAfterSavingFCV.off();
+
+// Make sure we see the FCV mismatch detection message on the recipient.
+checkLog.containsJson(recipientPrimary, 5382300);
+
+// Upgrade again to check on the status of the migration from the donor's point of view.
+assert.commandWorked(donorPrimary.adminCommand({setFeatureCompatibilityVersion: latestFCV}));
+const stateRes =
+ assert.commandWorked(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
+assert.eq(stateRes.state, TenantMigrationTest.State.kAborted);
+
+tenantMigrationTest.stop();
+})();
diff --git a/jstests/replsets/tenant_migration_donor_recipient_fcv_mismatch_after_failover.js b/jstests/replsets/tenant_migration_donor_recipient_fcv_mismatch_after_failover.js
new file mode 100644
index 00000000000..89e6a4bc387
--- /dev/null
+++ b/jstests/replsets/tenant_migration_donor_recipient_fcv_mismatch_after_failover.js
@@ -0,0 +1,70 @@
+/**
+ * Tests that restarting a migration attempt after a failover fails if the donor and recipient no
+ * longer share the same FCV.
+ * @tags: [requires_majority_read_concern, requires_fcv_49, incompatible_with_windows_tls]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject'
+load("jstests/libs/parallelTester.js"); // for 'Thread'
+load("jstests/replsets/libs/tenant_migration_test.js");
+load("jstests/replsets/libs/tenant_migration_util.js");
+
+const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()});
+if (!tenantMigrationTest.isFeatureFlagEnabled()) {
+ jsTestLog("Skipping test because the tenant migrations feature flag is disabled");
+ return;
+}
+
+const tenantId = "testTenantId";
+const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
+const collName = "testColl";
+
+const donorPrimary = tenantMigrationTest.getDonorPrimary();
+const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
+
+tenantMigrationTest.insertDonorDB(dbName, collName);
+
+const migrationId = UUID();
+const migrationOpts = {
+ migrationIdString: extractUUIDFromObject(migrationId),
+ recipientConnString: tenantMigrationTest.getRecipientConnString(),
+ tenantId: tenantId,
+};
+
+// Configure a failpoint to have the recipient primary hang after a successful initial comparison.
+const recipientDB = recipientPrimary.getDB(dbName);
+const hangAfterFirstFCVcheck =
+ configureFailPoint(recipientDB, "fpAfterComparingRecipientAndDonorFCV", {action: "hang"});
+
+// Start a migration and wait for recipient to hang at the failpoint.
+assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
+hangAfterFirstFCVcheck.wait();
+
+// Downgrade the FCV for the donor set.
+assert.commandWorked(
+ donorPrimary.adminCommand({setFeatureCompatibilityVersion: lastContinuousFCV}));
+
+// Step up a new node in the recipient set and trigger a failover. The new primary should attempt to
+// resume cloning, but fail upon re-checking the FCVs.
+const recipientRst = tenantMigrationTest.getRecipientRst();
+const newRecipientPrimary = recipientRst.getSecondaries()[0];
+recipientRst.awaitLastOpCommitted();
+assert.commandWorked(newRecipientPrimary.adminCommand({replSetStepUp: 1}));
+hangAfterFirstFCVcheck.off();
+recipientRst.getPrimary();
+
+// Make sure we see the FCV mismatch detection message on the recipient regardless.
+checkLog.containsJson(newRecipientPrimary, 5382300);
+
+// Upgrade again to check on the status of the migration from the donor's point of view.
+assert.commandWorked(donorPrimary.adminCommand({setFeatureCompatibilityVersion: latestFCV}));
+const stateRes =
+ assert.commandWorked(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
+assert.eq(stateRes.state, TenantMigrationTest.State.kAborted);
+
+tenantMigrationTest.stop();
+})();
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
index 514e73f7f21..8d569280538 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
@@ -79,6 +79,7 @@ NamespaceString getOplogBufferNs(const UUID& migrationUUID) {
MONGO_FAIL_POINT_DEFINE(pauseBeforeRunTenantMigrationRecipientInstance);
MONGO_FAIL_POINT_DEFINE(pauseAfterRunTenantMigrationRecipientInstance);
MONGO_FAIL_POINT_DEFINE(skipTenantMigrationRecipientAuth);
+MONGO_FAIL_POINT_DEFINE(skipComparingRecipientAndDonorFCV);
MONGO_FAIL_POINT_DEFINE(autoRecipientForgetMigration);
MONGO_FAIL_POINT_DEFINE(pauseAfterCreatingOplogBuffer);
@@ -87,6 +88,7 @@ MONGO_FAIL_POINT_DEFINE(failWhilePersistingTenantMigrationRecipientInstanceState
MONGO_FAIL_POINT_DEFINE(fpAfterPersistingTenantMigrationRecipientInstanceStateDoc);
MONGO_FAIL_POINT_DEFINE(fpAfterConnectingTenantMigrationRecipientInstance);
MONGO_FAIL_POINT_DEFINE(fpAfterRecordingRecipientPrimaryStartingFCV);
+MONGO_FAIL_POINT_DEFINE(fpAfterComparingRecipientAndDonorFCV);
MONGO_FAIL_POINT_DEFINE(fpAfterRetrievingStartOpTimesMigrationRecipientInstance);
MONGO_FAIL_POINT_DEFINE(fpAfterStartingOplogFetcherMigrationRecipientInstance);
MONGO_FAIL_POINT_DEFINE(setTenantMigrationRecipientInstanceHostTimeout);
@@ -1263,6 +1265,39 @@ void TenantMigrationRecipientService::Instance::_fetchAndStoreDonorClusterTimeKe
_scopedExecutor, std::move(keyDocs), token);
}
+void TenantMigrationRecipientService::Instance::_compareRecipientAndDonorFCV() const {
+ if (skipComparingRecipientAndDonorFCV.shouldFail()) { // Test-only.
+ return;
+ }
+
+ auto donorFCVbson =
+ _client->findOne(NamespaceString::kServerConfigurationNamespace.ns(),
+ QUERY("_id" << FeatureCompatibilityVersionParser::kParameterName),
+ nullptr,
+ QueryOption_SecondaryOk,
+ ReadConcernArgs(ReadConcernLevel::kMajorityReadConcern).toBSONInner());
+
+ uassert(5382302, "FCV on donor not set", !donorFCVbson.isEmpty());
+
+ auto swDonorFCV = FeatureCompatibilityVersionParser::parse(donorFCVbson);
+ uassertStatusOK(swDonorFCV.getStatus());
+
+ stdx::lock_guard lk(_mutex);
+ auto donorFCV = swDonorFCV.getValue();
+ auto recipientFCV = _stateDoc.getRecipientPrimaryStartingFCV();
+
+ if (donorFCV != recipientFCV) {
+ LOGV2_ERROR(5382300,
+ "Donor and recipient FCV mismatch",
+ "tenantId"_attr = getTenantId(),
+ "migrationId"_attr = getMigrationUUID(),
+ "donorConnString"_attr = _donorConnectionString,
+ "donorFCV"_attr = donorFCV,
+ "recipientFCV"_attr = recipientFCV);
+ uasserted(5382301, "Mismatch between donor and recipient FCV");
+ }
+}
+
SemiFuture<void> TenantMigrationRecipientService::Instance::run(
std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancelationToken& token) noexcept {
@@ -1352,6 +1387,10 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run(
})
.then([this, self = shared_from_this()] {
_stopOrHangOnFailPoint(&fpAfterRecordingRecipientPrimaryStartingFCV);
+ _compareRecipientAndDonorFCV();
+ })
+ .then([this, self = shared_from_this()] {
+ _stopOrHangOnFailPoint(&fpAfterComparingRecipientAndDonorFCV);
stdx::lock_guard lk(_mutex);
_getStartOpTimesFromDonor(lk);
return _updateStateDocForMajority(lk);
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.h b/src/mongo/db/repl/tenant_migration_recipient_service.h
index 21e4325b020..2fec9e6bf55 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.h
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.h
@@ -419,7 +419,12 @@ public:
/*
* Returns the majority OpTime on the donor node that 'client' is connected to.
*/
+
OpTime _getDonorMajorityOpTime(std::unique_ptr<mongo::DBClientConnection>& client);
+ /**
+ * Enforces that the donor and recipient share the same featureCompatibilityVersion.
+ */
+ void _compareRecipientAndDonorFCV() const;
mutable Mutex _mutex = MONGO_MAKE_LATCH("TenantMigrationRecipientService::_mutex");
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service_test.cpp b/src/mongo/db/repl/tenant_migration_recipient_service_test.cpp
index f25250f2045..cae192be65a 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service_test.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_service_test.cpp
@@ -38,6 +38,7 @@
#include "mongo/client/replica_set_monitor_protocol_test_util.h"
#include "mongo/config.h"
#include "mongo/db/client.h"
+#include "mongo/db/commands/feature_compatibility_version_document_gen.h"
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/op_observer_impl.h"
#include "mongo/db/op_observer_registry.h"
@@ -225,6 +226,10 @@ public:
// Set the sslMode to allowSSL to avoid validation error.
sslGlobalParams.sslMode.store(SSLParams::SSLMode_allowSSL);
+ // Skipped unless tested explicitly, as we will not receive an FCV document from the donor
+ // in these unittests without (unsightly) intervention.
+ auto compFp = globalFailPointRegistry().find("skipComparingRecipientAndDonorFCV");
+ compFp->setMode(FailPoint::alwaysOn);
// Timestamps of "0 seconds" are not allowed, so we must advance our clock mock to the first
// real second.
@@ -408,6 +413,18 @@ protected:
initialStateDoc.setStartFetchingDonorOpTime(startFetchingDonorOptime);
}
+ /**
+ * Sets the FCV on the donor so that it can respond to FCV requests appropriately.
+ * (Generic FCV reference): This FCV reference should exist across LTS binary versions.
+ */
+ void setDonorFCV(const TenantMigrationRecipientService::Instance* instance,
+ ServerGlobalParams::FeatureCompatibility::Version version =
+ ServerGlobalParams::FeatureCompatibility::kLatest) {
+ auto fcvDoc = FeatureCompatibilityVersionDocument(version);
+ auto client = getClient(instance);
+ client->insert(NamespaceString::kServerConfigurationNamespace.ns(), fcvDoc.toBSON());
+ }
+
private:
std::shared_ptr<ClockSourceMock> _clkSource = std::make_shared<ClockSourceMock>();
@@ -2797,6 +2814,51 @@ TEST_F(TenantMigrationRecipientServiceTest,
ASSERT_OK(instance->getCompletionFuture().getNoThrow());
}
+TEST_F(TenantMigrationRecipientServiceTest,
+ TenantMigrationRecipientServiceDonorAndRecipientFCVMismatch) {
+ stopFailPointEnableBlock fp("fpAfterComparingRecipientAndDonorFCV");
+
+ // Tests skip this check by default but we are specifically testing it here.
+ auto compFp = globalFailPointRegistry().find("skipComparingRecipientAndDonorFCV");
+ compFp->setMode(FailPoint::off);
+
+ // Set to allow the donor to respond to FCV requests.
+ auto connFp =
+ globalFailPointRegistry().find("fpAfterConnectingTenantMigrationRecipientInstance");
+ auto initialTimesEntered = connFp->setMode(FailPoint::alwaysOn,
+ 0,
+ BSON("action"
+ << "hang"));
+
+ const UUID migrationUUID = UUID::gen();
+ MockReplicaSet replSet("donorSet", 3, true /* hasPrimary */, true /* dollarPrefixHosts */);
+
+ TenantMigrationRecipientDocument initialStateDocument(
+ migrationUUID,
+ replSet.getConnectionString(),
+ "tenantA",
+ ReadPreferenceSetting(ReadPreference::PrimaryOnly));
+ initialStateDocument.setRecipientCertificateForDonor(kRecipientPEMPayload);
+
+ // Create and start the instance.
+ auto opCtx = makeOperationContext();
+ auto instance = TenantMigrationRecipientService::Instance::getOrCreate(
+ opCtx.get(), _service, initialStateDocument.toBSON());
+ ASSERT(instance.get());
+
+ // Set the donor FCV to be different from 'latest'.
+ // (Generic FCV reference): This FCV reference should exist across LTS binary versions.
+ connFp->waitForTimesEntered(initialTimesEntered + 1);
+ setDonorFCV(instance.get(), ServerGlobalParams::FeatureCompatibility::kLastContinuous);
+ connFp->setMode(FailPoint::off);
+
+ // Wait for task completion failure.
+ // The FCVs should differ so we expect to exit with an error.
+ std::int32_t expectedCode = 5382301;
+ ASSERT_EQ(expectedCode, instance->getDataSyncCompletionFuture().getNoThrow().code());
+ ASSERT_OK(instance->getCompletionFuture().getNoThrow());
+}
+
TEST_F(TenantMigrationRecipientServiceTest, WaitUntilTimestampIsMajorityCommitted) {
const UUID migrationUUID = UUID::gen();
const OpTime topOfOplogOpTime(Timestamp(5, 1), 1);