summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/multiVersion/genericSetFCVUsage/tenant_migration_donor_abort_on_fcv_change.js70
-rw-r--r--src/mongo/db/commands/set_feature_compatibility_version_command.cpp20
-rw-r--r--src/mongo/db/repl/primary_only_service.cpp30
-rw-r--r--src/mongo/db/repl/primary_only_service.h5
-rw-r--r--src/mongo/db/repl/tenant_migration_donor_service.cpp16
-rw-r--r--src/mongo/db/repl/tenant_migration_donor_service.h5
6 files changed, 146 insertions, 0 deletions
diff --git a/jstests/multiVersion/genericSetFCVUsage/tenant_migration_donor_abort_on_fcv_change.js b/jstests/multiVersion/genericSetFCVUsage/tenant_migration_donor_abort_on_fcv_change.js
new file mode 100644
index 00000000000..0e958568cc9
--- /dev/null
+++ b/jstests/multiVersion/genericSetFCVUsage/tenant_migration_donor_abort_on_fcv_change.js
@@ -0,0 +1,70 @@
+/**
+ * Tests that the donor cancels all migrations when its FCV changes.
+ * @tags: [requires_majority_read_concern, incompatible_with_windows_tls]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject'
+load("jstests/libs/parallelTester.js"); // for 'Thread'
+load("jstests/replsets/rslib.js"); // for 'setLogVerbosity'
+load("jstests/replsets/libs/tenant_migration_test.js");
+load("jstests/replsets/libs/tenant_migration_util.js");
+
+const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()});
+if (!tenantMigrationTest.isFeatureFlagEnabled()) {
+ jsTestLog("Skipping test because the tenant migrations feature flag is disabled");
+ return;
+}
+
+const tenantId = "testTenantId";
+const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
+const collName = "testColl";
+
+const donorRst = tenantMigrationTest.getDonorRst();
+const donorPrimary = tenantMigrationTest.getDonorPrimary();
+const donorDB = donorPrimary.getDB(dbName);
+
+setLogVerbosity([donorPrimary], {"tenantMigration": {"verbosity": 3}});
+
+tenantMigrationTest.insertDonorDB(dbName, collName);
+
+const migrationId = UUID();
+const migrationIdString = extractUUIDFromObject(migrationId);
+const migrationOpts = {
+ migrationIdString: migrationIdString,
+ recipientConnString: tenantMigrationTest.getRecipientConnString(),
+ tenantId: tenantId,
+};
+
+const hangWhileMigratingFP =
+ configureFailPoint(donorDB, "pauseTenantMigrationBeforeLeavingAbortingIndexBuildsState");
+
+// Start a migration and wait for donor to hang at the failpoint.
+assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
+
+hangWhileMigratingFP.wait();
+
+// Initiate a downgrade and let it complete.
+assert.commandWorked(
+ donorPrimary.adminCommand({setFeatureCompatibilityVersion: lastContinuousFCV}));
+
+// Upgrade again and finish the test.
+assert.commandWorked(donorPrimary.adminCommand({setFeatureCompatibilityVersion: latestFCV}));
+
+hangWhileMigratingFP.off();
+
+const stateRes =
+ assert.commandWorked(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
+assert.eq(stateRes.state, TenantMigrationTest.DonorState.kAborted);
+assert.eq(stateRes.abortReason.code, ErrorCodes.TenantMigrationAborted);
+
+tenantMigrationTest.waitForDonorNodesToReachState(
+ donorRst.nodes, migrationId, tenantId, TenantMigrationTest.DonorState.kAborted);
+
+assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
+
+tenantMigrationTest.stop();
+})(); \ No newline at end of file
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
index b66bb57dbbd..c515f60f036 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
+++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
@@ -49,10 +49,12 @@
#include "mongo/db/namespace_string.h"
#include "mongo/db/ops/write_ops.h"
#include "mongo/db/read_write_concern_defaults.h"
+#include "mongo/db/repl/primary_only_service.h"
#include "mongo/db/repl/repl_client_info.h"
#include "mongo/db/repl/repl_server_parameters_gen.h"
#include "mongo/db/repl/repl_set_config.h"
#include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/repl/tenant_migration_donor_service.h"
#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/db/s/sharding_ddl_coordinator_service.h"
#include "mongo/db/server_options.h"
@@ -411,6 +413,8 @@ private:
opCtx, CommandHelpers::appendMajorityWriteConcern(requestPhase1.toBSON({}))));
}
+ _cancelTenantMigrations(opCtx);
+
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
const bool isReplSet =
replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet;
@@ -535,6 +539,8 @@ private:
opCtx, CommandHelpers::appendMajorityWriteConcern(requestPhase1.toBSON({}))));
}
+ _cancelTenantMigrations(opCtx);
+
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
const bool isReplSet =
replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet;
@@ -642,6 +648,20 @@ private:
"last_continuous_version"_attr = FCVP::kLastContinuous);
}
+ /**
+ * Kills all tenant migrations active on this node, for both donors and recipients.
+ * Called after reaching an upgrading or downgrading state.
+ */
+ void _cancelTenantMigrations(OperationContext* opCtx) {
+ invariant(serverGlobalParams.featureCompatibility.isUpgradingOrDowngrading());
+ if (serverGlobalParams.clusterRole == ClusterRole::None) {
+ auto donorService = checked_cast<TenantMigrationDonorService*>(
+ repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
+ ->lookupServiceByName(TenantMigrationDonorService::kServiceName));
+ donorService->abortAllMigrations(opCtx);
+ }
+ }
+
} setFeatureCompatibilityVersionCommand;
} // namespace
diff --git a/src/mongo/db/repl/primary_only_service.cpp b/src/mongo/db/repl/primary_only_service.cpp
index c743fc1f489..1a9e0f99705 100644
--- a/src/mongo/db/repl/primary_only_service.cpp
+++ b/src/mongo/db/repl/primary_only_service.cpp
@@ -535,6 +535,36 @@ boost::optional<std::shared_ptr<PrimaryOnlyService::Instance>> PrimaryOnlyServic
return it->second.getInstance();
}
+std::vector<std::shared_ptr<PrimaryOnlyService::Instance>> PrimaryOnlyService::getAllInstances(
+ OperationContext* opCtx) {
+ // If this operation is holding any database locks, then it must have opted into getting
+ // interrupted at stepdown to prevent deadlocks.
+ invariant(!opCtx->lockState()->isLocked() || opCtx->shouldAlwaysInterruptAtStepDownOrUp() ||
+ opCtx->lockState()->wasGlobalLockTakenInModeConflictingWithWrites());
+
+ std::vector<std::shared_ptr<PrimaryOnlyService::Instance>> instances;
+
+ stdx::unique_lock lk(_mutex);
+ opCtx->waitForConditionOrInterrupt(
+ _rebuildCV, lk, [this]() { return _state != State::kRebuilding; });
+
+ if (_state == State::kShutdown || _state == State::kPaused) {
+ invariant(_activeInstances.empty());
+ return instances;
+ }
+ if (_state == State::kRebuildFailed) {
+ uassertStatusOK(_rebuildStatus);
+ return instances;
+ }
+ invariant(_state == State::kRunning);
+
+ for (auto& [instanceId, instance] : _activeInstances) {
+ instances.emplace_back(instance.getInstance());
+ }
+
+ return instances;
+}
+
void PrimaryOnlyService::releaseInstance(const InstanceID& id, Status status) {
auto savedInstanceNodeHandle = [&]() {
stdx::lock_guard lk(_mutex);
diff --git a/src/mongo/db/repl/primary_only_service.h b/src/mongo/db/repl/primary_only_service.h
index 946245f62b8..9daa31881c9 100644
--- a/src/mongo/db/repl/primary_only_service.h
+++ b/src/mongo/db/repl/primary_only_service.h
@@ -331,6 +331,11 @@ protected:
*/
std::shared_ptr<executor::TaskExecutor> getInstanceCleanupExecutor() const;
+ /**
+ * Returns shared pointers to all Instance objects that belong to this service.
+ */
+ std::vector<std::shared_ptr<Instance>> getAllInstances(OperationContext* opCtx);
+
private:
/**
* Represents a PrimaryOnlyService::Instance that has already been scheduled to be run.
diff --git a/src/mongo/db/repl/tenant_migration_donor_service.cpp b/src/mongo/db/repl/tenant_migration_donor_service.cpp
index 77e6f66e34c..35023a2f7b7 100644
--- a/src/mongo/db/repl/tenant_migration_donor_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_donor_service.cpp
@@ -141,6 +141,15 @@ void setPromiseOkIfNotReady(WithLock lk, Promise& promise) {
} // namespace
+void TenantMigrationDonorService::abortAllMigrations(OperationContext* opCtx) {
+ LOGV2(5356301, "Aborting all tenant migrations on donor");
+ auto instances = getAllInstances(opCtx);
+ for (auto& instance : instances) {
+ auto typedInstance = checked_pointer_cast<TenantMigrationDonorService::Instance>(instance);
+ typedInstance->onReceiveDonorAbortMigration();
+ }
+}
+
// Note this index is required on both the donor and recipient in a tenant migration, since each
// will copy cluster time keys from the other. The donor service is set up on all mongods on stepup
// to primary, so this index will be created on both donors and recipients.
@@ -740,6 +749,13 @@ SemiFuture<void> TenantMigrationDonorService::Instance::run(
}
}
+ // We must abort the migration if we try to start or resume while upgrading or downgrading.
+ // (Generic FCV reference): This FCV check should exist across LTS binary versions.
+ if (serverGlobalParams.featureCompatibility.isUpgradingOrDowngrading()) {
+ LOGV2(5356302, "Must abort tenant migration as donor is upgrading or downgrading");
+ onReceiveDonorAbortMigration();
+ }
+
auto abortToken = _initAbortMigrationSource(token);
auto recipientTargeterRS = std::make_shared<RemoteCommandTargeterRS>(
diff --git a/src/mongo/db/repl/tenant_migration_donor_service.h b/src/mongo/db/repl/tenant_migration_donor_service.h
index 842d2e8c0d5..799f02af7f8 100644
--- a/src/mongo/db/repl/tenant_migration_donor_service.h
+++ b/src/mongo/db/repl/tenant_migration_donor_service.h
@@ -68,6 +68,11 @@ public:
_serviceContext, this, initialState);
}
+ /**
+ * Sends an abort to all tenant migration instances on this donor.
+ */
+ void abortAllMigrations(OperationContext* opCtx);
+
class Instance final : public PrimaryOnlyService::TypedInstance<Instance> {
public:
struct DurableState {