summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/serverless/libs/basic_serverless_test.js6
-rw-r--r--jstests/serverless/shard_split_abort_during_upgrade_downgrade.js65
-rw-r--r--jstests/serverless/shard_split_abort_on_setfcv.js57
-rw-r--r--jstests/serverless/shard_split_enabled.js12
-rw-r--r--src/mongo/db/commands/SConscript1
-rw-r--r--src/mongo/db/commands/set_feature_compatibility_version_command.cpp18
-rw-r--r--src/mongo/db/serverless/shard_split_commands.cpp12
-rw-r--r--src/mongo/db/serverless/shard_split_donor_service.cpp21
-rw-r--r--src/mongo/db/serverless/shard_split_donor_service.h2
9 files changed, 176 insertions, 18 deletions
diff --git a/jstests/serverless/libs/basic_serverless_test.js b/jstests/serverless/libs/basic_serverless_test.js
index 0f374cb192c..3c6ae92556b 100644
--- a/jstests/serverless/libs/basic_serverless_test.js
+++ b/jstests/serverless/libs/basic_serverless_test.js
@@ -544,6 +544,12 @@ class BasicServerlessTest {
}
BasicServerlessTest.kConfigSplitDonorsNS = "config.tenantSplitDonors";
+BasicServerlessTest.DonorState = {
+ kUninitialized: "uninitialized",
+ kBlocking: "blocking",
+ kCommitted: "committed",
+ kAborted: "aborted"
+};
function findSplitOperation(primary, migrationId) {
const donorsCollection = primary.getCollection(BasicServerlessTest.kConfigSplitDonorsNS);
diff --git a/jstests/serverless/shard_split_abort_during_upgrade_downgrade.js b/jstests/serverless/shard_split_abort_during_upgrade_downgrade.js
new file mode 100644
index 00000000000..32ddb38b339
--- /dev/null
+++ b/jstests/serverless/shard_split_abort_during_upgrade_downgrade.js
@@ -0,0 +1,65 @@
+/*
+ * Prove that shard splits are aborted during FCV upgrade/downgrade.
+ *
+ * @tags: [requires_fcv_52, featureFlagShardSplit, serverless]
+ */
+
+(function() {
+"use strict";
+load("jstests/libs/fail_point_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+
+// Shard split commands are gated by a feature flag, which will not be supported when we
+// downgrade versions. Eventually, we will run this test when we have two consecutive versions
+// that support `commitShardSplit` without a feature flag. This check will be removed as part
+// of SERVER-66965.
+if (MongoRunner.compareBinVersions(latestFCV, "6.2") < 0) {
+ return;
+}
+
+// Skip db hash check because secondary is left with a different config.
+TestData.skipCheckDBHashes = true;
+const test = new BasicServerlessTest({
+ recipientTagName: "recipientNode",
+ recipientSetName: "recipient",
+ quickGarbageCollection: true
+});
+
+test.addRecipientNodes();
+
+const donorPrimary = testFixture.donor.getPrimary();
+const tenantIds = ["tenant1", "tenant2"];
+
+jsTestLog("Assert shard splits are aborted when downgrading.");
+const downgradeFCV = lastContinuousFCV;
+const hangWhileDowngradingFp = configureFailPoint(donorPrimary, "hangWhileDowngrading");
+const downgradeThread = new Thread((host, downgradeFCV) => {
+ const db = new Mongo(host);
+ assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: downgradeFCV}));
+}, donorPrimary.host, downgradeFCV);
+
+downgradeThread.start();
+hangWhileDowngradingFp.wait();
+const firstSplit = test.createSplitOperation(tenantIds);
+assert.commandFailedWithCode(firstSplit.commit(), ErrorCodes.TenantMigrationAborted);
+hangWhileDowngradingFp.off();
+downgradeThread.join();
+firstSplit.forget();
+
+jsTestLog("Assert shard splits are aborted when upgrading.");
+const hangWhileUpgradingFp = configureFailPoint(donorPrimary, "hangWhileUpgrading");
+const upgradeThread = new Thread((host) => {
+ const db = new Mongo(host);
+ assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: latestFCV}));
+}, donorPrimary.host);
+
+upgradeThread.start();
+hangWhileUpgradingFp.wait();
+const secondSplit = test.createSplitOperation(tenantIds);
+assert.commandFailedWithCode(secondSplit.commit(), ErrorCodes.TenantMigrationAborted);
+hangWhileUpgradingFp.off();
+upgradeThread.join();
+secondSplit.forget();
+
+test.stop();
+})();
diff --git a/jstests/serverless/shard_split_abort_on_setfcv.js b/jstests/serverless/shard_split_abort_on_setfcv.js
new file mode 100644
index 00000000000..5de893203ae
--- /dev/null
+++ b/jstests/serverless/shard_split_abort_on_setfcv.js
@@ -0,0 +1,57 @@
+/*
+ * Prove that shard splits are eagerly aborted when the `setFeatureCompatibilityVersion` command is
+ * received for both upgrade and downgrade paths.
+ *
+ * @tags: [requires_fcv_52, featureFlagShardSplit, serverless]
+ */
+
+(function() {
+"use strict";
+load("jstests/libs/fail_point_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+
+// Skip db hash check because secondary is left with a different config.
+TestData.skipCheckDBHashes = true;
+const test = new BasicServerlessTest({
+ recipientTagName: "recipientNode",
+ recipientSetName: "recipient",
+ quickGarbageCollection: true
+});
+
+test.addRecipientNodes();
+
+const donorPrimary = test.donor.getPrimary();
+const tenantIds = ["tenant1", "tenant2"];
+const pauseAfterBlockingFp = configureFailPoint(donorPrimary, "pauseShardSplitAfterBlocking");
+
+jsTestLog("Test FCV Downgrade");
+const split = test.createSplitOperation(tenantIds);
+const commitThread = split.commitAsync();
+pauseAfterBlockingFp.wait();
+assert.commandWorked(
+ donorPrimary.adminCommand({setFeatureCompatibilityVersion: lastContinuousFCV}));
+pauseAfterBlockingFp.off();
+assert.commandFailedWithCode(commitThread.returnData(), ErrorCodes.TenantMigrationAborted);
+
+jsTestLog("Test FCV Upgrade");
+if (lastContinuousFCV == "6.0") {
+ const secondSplit = test.createSplitOperation(tenantIds);
+ assert.commandFailedWithCode(secondSplit.commit(), ErrorCodes.IllegalOperation);
+} else {
+ // `forgetShardSplit` will not be available until the downgraded version also supports the
+ // 'shard split' feature.
+ split.forget();
+ test.cleanupSuccesfulAborted(split.migrationId, tenantIds);
+
+ const secondSplit = test.createSplitOperation(tenantIds);
+ const commitThread = secondSplit.commitAsync();
+ pauseAfterBlockingFp.wait();
+ assert.commandWorked(donorPrimary.adminCommand({setFeatureCompatibilityVersion: latestFCV}));
+ pauseAfterBlockingFp.off();
+ assert.commandFailedWithCode(commitThread.returnData(), ErrorCodes.TenantMigrationAborted);
+ secondSplit.forget();
+ test.cleanupSuccesfulAborted(secondSplit.migrationId, tenantIds);
+}
+
+test.stop();
+})();
diff --git a/jstests/serverless/shard_split_enabled.js b/jstests/serverless/shard_split_enabled.js
index 6cc27de9db7..a58aec52d1d 100644
--- a/jstests/serverless/shard_split_enabled.js
+++ b/jstests/serverless/shard_split_enabled.js
@@ -50,13 +50,13 @@ function makeShardSplitTest() {
let commitUUID = UUID();
let res = adminDB.runCommand(test.makeCommitShardSplitCmd(commitUUID));
assert.neq(res.code,
- 6057900,
+ ErrorCodes.IllegalOperation,
`commitShardSplitCmd shouldn't reject when featureFlagShardSplit is enabled`);
test.removeRecipientNodesFromDonor();
res = adminDB.runCommand(test.makeForgetShardSplitCmd(commitUUID));
assert.neq(res.code,
- 6057900,
+ ErrorCodes.IllegalOperation,
`forgetShardSplit shouldn't reject when featureFlagShardSplit is enabled`);
test.waitForGarbageCollection(commitUUID, tenantIds);
@@ -64,22 +64,22 @@ function makeShardSplitTest() {
let abortUUID = UUID();
res = adminDB.runCommand(test.makeAbortShardSplitCmd(abortUUID));
assert.neq(res.code,
- 6057902,
+ ErrorCodes.IllegalOperation,
`abortShardSplitCmd shouldn't reject when featureFlagShardSplit is enabled`);
assert.commandWorked(adminDB.adminCommand({setFeatureCompatibilityVersion: downgradeFCV}));
assert.commandFailedWithCode(
adminDB.runCommand(test.makeCommitShardSplitCmd(UUID())),
- 6057900,
+ ErrorCodes.IllegalOperation,
`commitShardSplitCmd should reject when featureFlagShardSplit is disabled`);
assert.commandFailedWithCode(
adminDB.runCommand(test.makeAbortShardSplitCmd(UUID())),
- 6057902,
+ ErrorCodes.IllegalOperation,
`abortShardSplitCmd should reject when featureFlagShardSplit is disabled`);
assert.commandFailedWithCode(
adminDB.runCommand(test.makeForgetShardSplitCmd(UUID())),
- 6236600,
+ ErrorCodes.IllegalOperation,
`forgetShardSplit should reject when featureFlagShardSplit is disabled`);
// shut down replica set
diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript
index 073190f124e..cf6f79671bc 100644
--- a/src/mongo/db/commands/SConscript
+++ b/src/mongo/db/commands/SConscript
@@ -582,6 +582,7 @@ env.Library(
'$BUILD_DIR/mongo/db/s/user_writes_recoverable_critical_section',
'$BUILD_DIR/mongo/db/server_feature_flags',
'$BUILD_DIR/mongo/db/server_options_core',
+ '$BUILD_DIR/mongo/db/serverless/shard_split_donor_service',
'$BUILD_DIR/mongo/db/tenant_id',
'$BUILD_DIR/mongo/db/timeseries/timeseries_conversion_util',
'$BUILD_DIR/mongo/db/transaction_api',
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
index 1a045c0e806..ad2084e2ac7 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
+++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
@@ -51,6 +51,7 @@
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/db_raii.h"
#include "mongo/db/dbdirectclient.h"
+#include "mongo/db/global_settings.h"
#include "mongo/db/index_builds_coordinator.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/ops/write_ops.h"
@@ -77,6 +78,7 @@
#include "mongo/db/s/transaction_coordinator_service.h"
#include "mongo/db/server_feature_flags_gen.h"
#include "mongo/db/server_options.h"
+#include "mongo/db/serverless/shard_split_donor_service.h"
#include "mongo/db/session_catalog.h"
#include "mongo/db/session_txn_record_gen.h"
#include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h"
@@ -532,7 +534,7 @@ private:
opCtx, CommandHelpers::appendMajorityWriteConcern(requestPhase1.toBSON({}))));
}
- _cancelTenantMigrations(opCtx);
+ _cancelServerlessMigrations(opCtx);
{
// Take the FCV full transition lock in S mode to create a barrier for operations taking
@@ -608,7 +610,7 @@ private:
Balancer::get(opCtx)->applyLegacyChunkSizeConstraintsOnClusterData(opCtx);
}
- _cancelTenantMigrations(opCtx);
+ _cancelServerlessMigrations(opCtx);
{
// Take the FCV full transition lock in S mode to create a barrier for operations taking
@@ -797,21 +799,29 @@ private:
}
/**
- * Kills all tenant migrations active on this node, for both donors and recipients.
+ * Abort all serverless migrations active on this node, for both donors and recipients.
* Called after reaching an upgrading or downgrading state.
*/
- void _cancelTenantMigrations(OperationContext* opCtx) {
+ void _cancelServerlessMigrations(OperationContext* opCtx) {
invariant(serverGlobalParams.featureCompatibility.isUpgradingOrDowngrading());
if (serverGlobalParams.clusterRole == ClusterRole::None) {
auto donorService = checked_cast<TenantMigrationDonorService*>(
repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
->lookupServiceByName(TenantMigrationDonorService::kServiceName));
donorService->abortAllMigrations(opCtx);
+
auto recipientService = checked_cast<repl::TenantMigrationRecipientService*>(
repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
->lookupServiceByName(repl::TenantMigrationRecipientService::
kTenantMigrationRecipientServiceName));
recipientService->abortAllMigrations(opCtx);
+
+ if (getGlobalReplSettings().isServerless()) {
+ auto splitDonorService = checked_cast<ShardSplitDonorService*>(
+ repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
+ ->lookupServiceByName(ShardSplitDonorService::kServiceName));
+ splitDonorService->abortAllSplits(opCtx);
+ }
}
}
diff --git a/src/mongo/db/serverless/shard_split_commands.cpp b/src/mongo/db/serverless/shard_split_commands.cpp
index 71a2d64203e..9da6c9ce8b8 100644
--- a/src/mongo/db/serverless/shard_split_commands.cpp
+++ b/src/mongo/db/serverless/shard_split_commands.cpp
@@ -50,8 +50,8 @@ public:
using InvocationBase::InvocationBase;
Response typedRun(OperationContext* opCtx) {
- uassert(6057900,
- "Feature \"shard split\" not supported",
+ uassert(ErrorCodes::IllegalOperation,
+ "Feature 'shard split' not supported",
repl::feature_flags::gShardSplit.isEnabled(
serverGlobalParams.featureCompatibility));
uassert(ErrorCodes::IllegalOperation,
@@ -138,8 +138,8 @@ public:
using InvocationBase::InvocationBase;
void typedRun(OperationContext* opCtx) {
- uassert(6057902,
- "Feature \"shard split\" not supported",
+ uassert(ErrorCodes::IllegalOperation,
+ "Feature 'shard split' not supported",
repl::feature_flags::gShardSplit.isEnabled(
serverGlobalParams.featureCompatibility));
uassert(ErrorCodes::CommandNotSupported,
@@ -215,8 +215,8 @@ public:
using InvocationBase::InvocationBase;
void typedRun(OperationContext* opCtx) {
- uassert(6236600,
- "feature \"shard split\" not supported",
+ uassert(ErrorCodes::IllegalOperation,
+ "Feature 'shard split' not supported",
repl::feature_flags::gShardSplit.isEnabled(
serverGlobalParams.featureCompatibility));
uassert(ErrorCodes::CommandNotSupported,
diff --git a/src/mongo/db/serverless/shard_split_donor_service.cpp b/src/mongo/db/serverless/shard_split_donor_service.cpp
index f6f28ed8aaf..f37a9416f5e 100644
--- a/src/mongo/db/serverless/shard_split_donor_service.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_service.cpp
@@ -226,8 +226,8 @@ void ShardSplitDonorService::checkIfConflictsWithOtherInstances(
isGarbageCollectable;
uassert(ErrorCodes::ConflictingOperationInProgress,
- str::stream() << "Can't start a concurent shard split operation against"
- << " migrationId:" << existingTypedInstance->getId(),
+ str::stream() << "Can't start a concurent shard split operation, currently running"
+ << " migrationId: " << existingTypedInstance->getId(),
existingIsAborted);
}
}
@@ -240,6 +240,16 @@ std::shared_ptr<repl::PrimaryOnlyService::Instance> ShardSplitDonorService::cons
ShardSplitDonorDocument::parse(IDLParserErrorContext("donorStateDoc"), initialState));
}
+void ShardSplitDonorService::abortAllSplits(OperationContext* opCtx) {
+ LOGV2(8423361, "Aborting all active shard split operations.");
+ auto instances = getAllInstances(opCtx);
+ for (auto& instance : instances) {
+ auto typedInstance =
+ checked_pointer_cast<ShardSplitDonorService::DonorStateMachine>(instance);
+ typedInstance->tryAbort();
+ }
+}
+
ExecutorFuture<void> ShardSplitDonorService::_createStateDocumentTTLIndex(
std::shared_ptr<executor::ScopedTaskExecutor> executor, const CancellationToken& token) {
return AsyncTry([this] {
@@ -337,6 +347,13 @@ SemiFuture<void> ShardSplitDonorService::DonorStateMachine::run(
_abortSource->cancel();
}
+ // We must abort the migration if we try to start or resume while upgrading or downgrading.
+ // (Generic FCV reference): This FCV check should exist across LTS binary versions.
+ if (serverGlobalParams.featureCompatibility.isUpgradingOrDowngrading()) {
+ LOGV2(8423360, "Aborting shard split since donor is upgrading or downgrading.");
+ _abortSource->cancel();
+ }
+
return _abortSource->token();
}();
diff --git a/src/mongo/db/serverless/shard_split_donor_service.h b/src/mongo/db/serverless/shard_split_donor_service.h
index 36ba21c2a77..9c6c3645de2 100644
--- a/src/mongo/db/serverless/shard_split_donor_service.h
+++ b/src/mongo/db/serverless/shard_split_donor_service.h
@@ -61,6 +61,8 @@ public:
ThreadPool::Limits getThreadPoolLimits() const override;
+ void abortAllSplits(OperationContext* opCtx);
+
protected:
// Instance conflict check not yet implemented.
void checkIfConflictsWithOtherInstances(