summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorMatt Broadstone <mbroadst@mongodb.com>2022-05-18 12:14:15 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-05-18 13:00:33 +0000
commit005aabeba20d762170e76e924ccb9b9b663fe983 (patch)
tree5b6ee51ab36e3394d0c2d8034d827a477d8849e8 /src/mongo/db
parentfe5d7920309ac836c7b45daa0f314de442c00636 (diff)
downloadmongo-005aabeba20d762170e76e924ccb9b9b663fe983.tar.gz
SERVER-65076 Add stepdown passthrough suites for shard split
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/repl/tenant_migration_access_blocker_util.cpp7
-rw-r--r--src/mongo/db/serverless/shard_split_donor_op_observer.cpp21
-rw-r--r--src/mongo/db/serverless/shard_split_donor_service.cpp45
-rw-r--r--src/mongo/db/serverless/shard_split_donor_service_test.cpp44
4 files changed, 44 insertions, 73 deletions
diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
index 0cbea9ac7f3..53e7b24f135 100644
--- a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
+++ b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
@@ -448,19 +448,16 @@ void recoverTenantMigrationAccessBlockers(OperationContext* opCtx) {
return true;
}
- // TODO(SERVER-64619) No longer use a dummy connectiong string when it is no longer a
- // required parameter.
- std::string dummmyRecipientConnectionString = "mongodb://FAKE_URI/?replSet=INVALID";
-
auto optionalTenants = doc.getTenantIds();
invariant(optionalTenants);
for (const auto& tenantId : optionalTenants.get()) {
+ invariant(doc.getRecipientConnectionString());
auto mtab = std::make_shared<TenantMigrationDonorAccessBlocker>(
opCtx->getServiceContext(),
doc.getId(),
tenantId.toString(),
MigrationProtocolEnum::kMultitenantMigrations,
- dummmyRecipientConnectionString);
+ doc.getRecipientConnectionString()->toString());
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
.add(tenantId.toString(), mtab);
diff --git a/src/mongo/db/serverless/shard_split_donor_op_observer.cpp b/src/mongo/db/serverless/shard_split_donor_op_observer.cpp
index 900c1249458..ce1d0e55ddf 100644
--- a/src/mongo/db/serverless/shard_split_donor_op_observer.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_op_observer.cpp
@@ -138,14 +138,19 @@ void onBlockerInitialization(OperationContext* opCtx,
// The primary create and sets the tenant access blocker to blocking within the
// ShardSplitDonorService.
if (isSecondary(opCtx)) {
- auto recipientTagName = donorStateDoc.getRecipientTagName();
- auto recipientSetName = donorStateDoc.getRecipientSetName();
- invariant(recipientTagName);
- invariant(recipientSetName);
-
- auto config = repl::ReplicationCoordinator::get(cc().getServiceContext())->getConfig();
- auto recipientConnectionString =
- serverless::makeRecipientConnectionString(config, *recipientTagName, *recipientSetName);
+ auto recipientConnectionString = [stateDoc = donorStateDoc]() {
+ if (stateDoc.getRecipientConnectionString()) {
+ return *stateDoc.getRecipientConnectionString();
+ }
+
+ auto recipientTagName = stateDoc.getRecipientTagName();
+ invariant(recipientTagName);
+ auto recipientSetName = stateDoc.getRecipientSetName();
+ invariant(recipientSetName);
+ auto config = repl::ReplicationCoordinator::get(cc().getServiceContext())->getConfig();
+ return serverless::makeRecipientConnectionString(
+ config, *recipientTagName, *recipientSetName);
+ }();
for (const auto& tenantId : tenantIds) {
auto mtab = std::make_shared<TenantMigrationDonorAccessBlocker>(
diff --git a/src/mongo/db/serverless/shard_split_donor_service.cpp b/src/mongo/db/serverless/shard_split_donor_service.cpp
index 9553aae29df..5ad7b0de654 100644
--- a/src/mongo/db/serverless/shard_split_donor_service.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_service.cpp
@@ -58,7 +58,6 @@ namespace mongo {
namespace {
MONGO_FAIL_POINT_DEFINE(abortShardSplitBeforeLeavingBlockingState);
-MONGO_FAIL_POINT_DEFINE(pauseShardSplitRecipientListenerCompletion);
MONGO_FAIL_POINT_DEFINE(pauseShardSplitBeforeBlockingState);
MONGO_FAIL_POINT_DEFINE(pauseShardSplitAfterBlocking);
MONGO_FAIL_POINT_DEFINE(pauseShardSplitAfterDecision);
@@ -66,6 +65,7 @@ MONGO_FAIL_POINT_DEFINE(skipShardSplitWaitForSplitAcceptance);
MONGO_FAIL_POINT_DEFINE(pauseShardSplitBeforeRecipientCleanup);
MONGO_FAIL_POINT_DEFINE(pauseShardSplitAfterMarkingStateGarbageCollectable);
MONGO_FAIL_POINT_DEFINE(skipShardSplitRecipientCleanup);
+MONGO_FAIL_POINT_DEFINE(pauseShardSplitBeforeLeavingBlockingState);
const Backoff kExponentialBackoff(Seconds(1), Milliseconds::max());
@@ -197,13 +197,6 @@ SemiFuture<void> makeRecipientAcceptSplitFuture(
monitor->shutdown();
}
- LOGV2(6634900,
- "Shutting down shard split recipient listener.",
- "id"_attr = migrationId,
- "recipientReady"_attr = listener->getFuture().isReady());
-
- pauseShardSplitRecipientListenerCompletion.pauseWhileSet();
-
return s;
})
.semi();
@@ -417,10 +410,6 @@ SemiFuture<void> ShardSplitDonorService::DonorStateMachine::run(
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
pauseShardSplitAfterBlocking.pauseWhileSet(opCtx.get());
- if (MONGO_unlikely(abortShardSplitBeforeLeavingBlockingState.shouldFail())) {
- uasserted(ErrorCodes::InternalError, "simulate a shard split error");
- }
-
return _waitForRecipientToReachBlockTimestamp(executor, abortToken);
})
.then([this, executor, abortToken] {
@@ -655,14 +644,34 @@ ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplitAndTrig
LOGV2(6142501, "Waiting for recipient to accept the split.", "id"_attr = _migrationId);
- return _splitAcceptancePromise.getFuture()
- .thenRunOn(**executor)
+ return ExecutorFuture(**executor)
+ .then([&]() { return _splitAcceptancePromise.getFuture(); })
+ .then([this] {
+ auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
+ if (MONGO_unlikely(pauseShardSplitBeforeLeavingBlockingState.shouldFail())) {
+ pauseShardSplitBeforeLeavingBlockingState.execute([&](const BSONObj& data) {
+ if (!data.hasField("blockTimeMS")) {
+ pauseShardSplitBeforeLeavingBlockingState.pauseWhileSet(opCtx.get());
+ } else {
+ const auto blockTime = Milliseconds{data.getIntField("blockTimeMS")};
+ LOGV2(8423359,
+ "Keeping shard split in blocking state.",
+ "blockTime"_attr = blockTime);
+ opCtx->sleepFor(blockTime);
+ }
+ });
+ }
+
+ if (MONGO_unlikely(abortShardSplitBeforeLeavingBlockingState.shouldFail())) {
+ uasserted(ErrorCodes::InternalError, "simulate a shard split error");
+ }
+ })
.then([this, recipients, abortToken, remoteCommandExecutor] {
LOGV2(6493901,
- "Triggering an election after applying the split config.",
+ "Triggering an election after recipient has accepted the split.",
"id"_attr = _migrationId);
- // replSetStepUp on a random node will succeed as long as it's note the most out-of-date
+ // replSetStepUp on a random node will succeed as long as it's not the most out-of-date
// node (in that case at least another node will vote for it and the election will
// succeed). Selecting a random node has a 2/3 chance to succeed for replSetStepUp. If
// the first command fail, we know this node is the most out-of-date. Therefore we
@@ -679,7 +688,7 @@ ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplitAndTrig
if (!replSetStepUpStatus.isOK()) {
LOGV2(6493904,
"Failed to trigger an election on the recipient replica set.",
- "replSetStatus"_attr = replSetStepUpStatus);
+ "status"_attr = replSetStepUpStatus);
}
// Even if replSetStepUp failed, the recipient nodes have joined the
@@ -690,7 +699,7 @@ ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplitAndTrig
});
})
.thenRunOn(**executor)
- .onCompletion([this, executor, abortToken](Status status) {
+ .then([this, executor, abortToken]() {
LOGV2(6142503, "Entering 'committed' state.", "id"_attr = _stateDoc.getId());
return _updateStateDocument(executor, abortToken, ShardSplitDonorStateEnum::kCommitted)
diff --git a/src/mongo/db/serverless/shard_split_donor_service_test.cpp b/src/mongo/db/serverless/shard_split_donor_service_test.cpp
index 3104e978af8..9c939042986 100644
--- a/src/mongo/db/serverless/shard_split_donor_service_test.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_service_test.cpp
@@ -30,6 +30,7 @@
#include <memory>
+#include "mongo/client/connection_string.h"
#include "mongo/client/replica_set_monitor.h"
#include "mongo/client/sdam/server_description_builder.h"
#include "mongo/client/streamable_replica_set_monitor_for_testing.h"
@@ -644,48 +645,6 @@ TEST_F(ShardSplitDonorServiceTest, ShardSplitDonorServiceTimeout) {
ASSERT_TRUE(serviceInstance->isGarbageCollectable());
}
-// Verify the recipient acceptance listener stops when the shard split aborts due to an error. If it
-// does not, the test will timeout.
-TEST_F(ShardSplitDonorServiceTest, ListenerTerminatesOnError) {
- auto opCtx = makeOperationContext();
- auto serviceContext = getServiceContext();
-
- FailPointEnableBlock fp("abortShardSplitBeforeLeavingBlockingState");
-
- auto listenerCompleted =
- std::make_unique<FailPointEnableBlock>("pauseShardSplitRecipientListenerCompletion");
- const auto initialTimes = listenerCompleted->initialTimesEntered();
-
- // Ensure the listener is created.
- ShardSplitDonorService::DonorStateMachine::setSplitAcceptanceTaskExecutor_forTest(_executor);
- _skipAcceptanceFP.reset();
-
- test::shard_split::ScopedTenantAccessBlocker scopedTenants(_tenantIds, opCtx.get());
- test::shard_split::reconfigToAddRecipientNodes(
- serviceContext, _recipientTagName, _replSet.getHosts(), _recipientSet.getHosts());
-
- auto stateDocument = defaultStateDocument();
-
- auto serviceInstance = ShardSplitDonorService::DonorStateMachine::getOrCreate(
- opCtx.get(), _service, stateDocument.toBSON());
- ASSERT(serviceInstance.get());
-
- auto result = serviceInstance->decisionFuture().get(opCtx.get());
-
- ASSERT(!!result.abortReason);
- ASSERT_EQ(result.abortReason->code(), ErrorCodes::InternalError);
- ASSERT_EQ(result.state, mongo::ShardSplitDonorStateEnum::kAborted);
-
- serviceInstance->tryForget();
-
- ASSERT_OK(serviceInstance->completionFuture().getNoThrow());
- ASSERT_TRUE(serviceInstance->isGarbageCollectable());
-
- // If the listener does not stop, this will hang indefinitely and the test will timeout.
- (*listenerCompleted)->waitForTimesEntered(initialTimes + 1);
- listenerCompleted.reset();
-}
-
// Abort scenario : abortSplit called before startSplit.
TEST_F(ShardSplitDonorServiceTest, CreateInstanceInAbortState) {
auto opCtx = makeOperationContext();
@@ -1015,6 +974,7 @@ public:
auto stateDocument = defaultStateDocument();
stateDocument.setBlockTimestamp(Timestamp(1, 1));
stateDocument.setState(ShardSplitDonorStateEnum::kBlocking);
+ stateDocument.setRecipientConnectionString(ConnectionString::forLocal());
return stateDocument;
}