summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAllison Easton <allison.easton@mongodb.com>2022-05-16 16:59:22 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-05-19 12:10:12 +0000
commitea66e125bf6368da4146c1d8974c595e29787542 (patch)
treeb77f80604846328130fc0226a8bd1b862d894046 /src
parent856c10ff0da11b45e1a8462e48f93cf03381485c (diff)
downloadmongo-ea66e125bf6368da4146c1d8974c595e29787542.tar.gz
SERVER-65930 DDL coordinators and rename participant initial checkpoint may incur in DuplicateKey error
(cherry picked from commit 1eb5a9257b3bfc0c768b342d73c3668cc6566841)
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/s/rename_collection_participant_service.cpp19
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator.cpp7
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator.h17
3 files changed, 33 insertions, 10 deletions
diff --git a/src/mongo/db/s/rename_collection_participant_service.cpp b/src/mongo/db/s/rename_collection_participant_service.cpp
index 4d9efb66056..51256975393 100644
--- a/src/mongo/db/s/rename_collection_participant_service.cpp
+++ b/src/mongo/db/s/rename_collection_participant_service.cpp
@@ -201,12 +201,23 @@ void RenameParticipantInstance::_enterPhase(Phase newPhase) {
PersistentTaskStore<StateDoc> store(NamespaceString::kShardingRenameParticipantsNamespace);
if (_doc.getPhase() == Phase::kUnset) {
- store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernShardingTimeout);
+ try {
+ store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernNoTimeout);
+ } catch (const ExceptionFor<ErrorCodes::DuplicateKey>&) {
+ // A series of step-up and step-down events can cause a node to try and insert the
+ // document when it has already been persisted locally, but we must still wait for
+ // majority commit.
+ const auto replCoord = repl::ReplicationCoordinator::get(opCtx.get());
+ const auto lastLocalOpTime = replCoord->getMyLastAppliedOpTime();
+ WaitForMajorityService::get(opCtx->getServiceContext())
+ .waitUntilMajority(lastLocalOpTime, opCtx.get()->getCancellationToken())
+ .get(opCtx.get());
+ }
} else {
store.update(opCtx.get(),
BSON(StateDoc::kFromNssFieldName << fromNss().ns()),
newDoc.toBSON(),
- WriteConcerns::kMajorityWriteConcernShardingTimeout);
+ WriteConcerns::kMajorityWriteConcernNoTimeout);
}
_doc = std::move(newDoc);
@@ -222,7 +233,7 @@ void RenameParticipantInstance::_removeStateDocument(OperationContext* opCtx) {
PersistentTaskStore<StateDoc> store(NamespaceString::kShardingRenameParticipantsNamespace);
store.remove(opCtx,
BSON(StateDoc::kFromNssFieldName << fromNss().ns()),
- WriteConcerns::kMajorityWriteConcernShardingTimeout);
+ WriteConcerns::kMajorityWriteConcernNoTimeout);
_doc = {};
}
@@ -387,7 +398,7 @@ SemiFuture<void> RenameParticipantInstance::_runImpl(
service->releaseRecoverableCriticalSection(
opCtx, fromNss(), reason, ShardingCatalogClient::kLocalWriteConcern);
service->releaseRecoverableCriticalSection(
- opCtx, toNss(), reason, ShardingCatalogClient::kMajorityWriteConcern);
+ opCtx, toNss(), reason, WriteConcerns::kMajorityWriteConcernNoTimeout);
LOGV2(5515107, "CRUD unblocked", "fromNs"_attr = fromNss(), "toNs"_attr = toNss());
}))
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.cpp b/src/mongo/db/s/sharding_ddl_coordinator.cpp
index 28741f4503b..9ce509fe4b6 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.cpp
+++ b/src/mongo/db/s/sharding_ddl_coordinator.cpp
@@ -116,10 +116,9 @@ bool ShardingDDLCoordinator::_removeDocument(OperationContext* opCtx) {
batchedResponse.parseBSON(commandReply, &unusedErrmsg);
WriteConcernResult ignoreResult;
- const WriteConcernOptions majorityWriteConcern{
- WriteConcernOptions::kMajority,
- WriteConcernOptions::SyncMode::UNSET,
- WriteConcernOptions::kWriteConcernTimeoutSharding};
+ const WriteConcernOptions majorityWriteConcern{WriteConcernOptions::kMajority,
+ WriteConcernOptions::SyncMode::UNSET,
+ WriteConcernOptions::kNoTimeout};
auto latestOpTime = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
uassertStatusOK(waitForWriteConcern(opCtx, latestOpTime, majorityWriteConcern, &ignoreResult));
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.h b/src/mongo/db/s/sharding_ddl_coordinator.h
index e549a13f2a4..fdb4d35014b 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.h
+++ b/src/mongo/db/s/sharding_ddl_coordinator.h
@@ -32,6 +32,8 @@
#include "mongo/db/namespace_string.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/persistent_task_store.h"
+#include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/repl/wait_for_majority_service.h"
#include "mongo/db/s/dist_lock_manager.h"
#include "mongo/db/s/forwardable_operation_metadata.h"
#include "mongo/db/s/sharding_ddl_coordinator_gen.h"
@@ -129,7 +131,18 @@ protected:
auto opCtx = cc().makeOperationContext();
PersistentTaskStore<StateDoc> store(NamespaceString::kShardingDDLCoordinatorsNamespace);
- store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernShardingTimeout);
+ try {
+ store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernNoTimeout);
+ } catch (const ExceptionFor<ErrorCodes::DuplicateKey>&) {
+ // A series of step-up and step-down events can cause a node to try and insert the
+ // document when it has already been persisted locally, but we must still wait for
+ // majority commit.
+ const auto replCoord = repl::ReplicationCoordinator::get(opCtx.get());
+ const auto lastLocalOpTime = replCoord->getMyLastAppliedOpTime();
+ WaitForMajorityService::get(opCtx->getServiceContext())
+ .waitUntilMajority(lastLocalOpTime, opCtx.get()->getCancellationToken())
+ .get(opCtx.get());
+ }
return std::move(newDoc);
}
@@ -141,7 +154,7 @@ protected:
store.update(opCtx,
BSON(StateDoc::kIdFieldName << newDoc.getId().toBSON()),
newDoc.toBSON(),
- WriteConcerns::kMajorityWriteConcernShardingTimeout);
+ WriteConcerns::kMajorityWriteConcernNoTimeout);
return std::move(newDoc);
}