diff options
author | Allison Easton <allison.easton@mongodb.com> | 2022-05-16 16:59:22 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-05-19 11:37:39 +0000 |
commit | 2bd6810b6afe06530969d77ffd3931978feef8c3 (patch) | |
tree | 52a67e04746266197fb95fae72d58eb9cb68b719 | |
parent | 533ef33817696b85bac9091c4257a9204da6db95 (diff) | |
download | mongo-2bd6810b6afe06530969d77ffd3931978feef8c3.tar.gz |
SERVER-65930 DDL coordinators and rename participant initial checkpoint may incur in DuplicateKey errorr5.3.2-rc1
(cherry picked from commit 1eb5a9257b3bfc0c768b342d73c3668cc6566841)
-rw-r--r-- | src/mongo/db/s/rename_collection_participant_service.cpp | 19 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_ddl_coordinator.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_ddl_coordinator.h | 17 |
3 files changed, 33 insertions, 10 deletions
diff --git a/src/mongo/db/s/rename_collection_participant_service.cpp b/src/mongo/db/s/rename_collection_participant_service.cpp index bf2766e668d..adf0f306c96 100644 --- a/src/mongo/db/s/rename_collection_participant_service.cpp +++ b/src/mongo/db/s/rename_collection_participant_service.cpp @@ -201,12 +201,23 @@ void RenameParticipantInstance::_enterPhase(Phase newPhase) { PersistentTaskStore<StateDoc> store(NamespaceString::kShardingRenameParticipantsNamespace); if (_doc.getPhase() == Phase::kUnset) { - store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernShardingTimeout); + try { + store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernNoTimeout); + } catch (const ExceptionFor<ErrorCodes::DuplicateKey>&) { + // A series of step-up and step-down events can cause a node to try and insert the + // document when it has already been persisted locally, but we must still wait for + // majority commit. + const auto replCoord = repl::ReplicationCoordinator::get(opCtx.get()); + const auto lastLocalOpTime = replCoord->getMyLastAppliedOpTime(); + WaitForMajorityService::get(opCtx->getServiceContext()) + .waitUntilMajority(lastLocalOpTime, opCtx.get()->getCancellationToken()) + .get(opCtx.get()); + } } else { store.update(opCtx.get(), BSON(StateDoc::kFromNssFieldName << fromNss().ns()), newDoc.toBSON(), - WriteConcerns::kMajorityWriteConcernShardingTimeout); + WriteConcerns::kMajorityWriteConcernNoTimeout); } _doc = std::move(newDoc); @@ -222,7 +233,7 @@ void RenameParticipantInstance::_removeStateDocument(OperationContext* opCtx) { PersistentTaskStore<StateDoc> store(NamespaceString::kShardingRenameParticipantsNamespace); store.remove(opCtx, BSON(StateDoc::kFromNssFieldName << fromNss().ns()), - WriteConcerns::kMajorityWriteConcernShardingTimeout); + WriteConcerns::kMajorityWriteConcernNoTimeout); _doc = {}; } @@ -377,7 +388,7 @@ SemiFuture<void> RenameParticipantInstance::_runImpl( service->releaseRecoverableCriticalSection( opCtx, fromNss(), reason, ShardingCatalogClient::kLocalWriteConcern); service->releaseRecoverableCriticalSection( - opCtx, toNss(), reason, ShardingCatalogClient::kMajorityWriteConcern); + opCtx, toNss(), reason, WriteConcerns::kMajorityWriteConcernNoTimeout); LOGV2(5515107, "CRUD unblocked", "fromNs"_attr = fromNss(), "toNs"_attr = toNss()); })) diff --git a/src/mongo/db/s/sharding_ddl_coordinator.cpp b/src/mongo/db/s/sharding_ddl_coordinator.cpp index ffc0e4c7741..38d3d663af2 100644 --- a/src/mongo/db/s/sharding_ddl_coordinator.cpp +++ b/src/mongo/db/s/sharding_ddl_coordinator.cpp @@ -122,10 +122,9 @@ bool ShardingDDLCoordinator::_removeDocument(OperationContext* opCtx) { batchedResponse.parseBSON(commandReply, &unusedErrmsg); WriteConcernResult ignoreResult; - const WriteConcernOptions majorityWriteConcern{ - WriteConcernOptions::kMajority, - WriteConcernOptions::SyncMode::UNSET, - WriteConcernOptions::kWriteConcernTimeoutSharding}; + const WriteConcernOptions majorityWriteConcern{WriteConcernOptions::kMajority, + WriteConcernOptions::SyncMode::UNSET, + WriteConcernOptions::kNoTimeout}; auto latestOpTime = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(); uassertStatusOK(waitForWriteConcern(opCtx, latestOpTime, majorityWriteConcern, &ignoreResult)); diff --git a/src/mongo/db/s/sharding_ddl_coordinator.h b/src/mongo/db/s/sharding_ddl_coordinator.h index c9e15c6fdf2..c41eb6f6cec 100644 --- a/src/mongo/db/s/sharding_ddl_coordinator.h +++ b/src/mongo/db/s/sharding_ddl_coordinator.h @@ -33,6 +33,8 @@ #include "mongo/db/namespace_string.h" #include "mongo/db/operation_context.h" #include "mongo/db/persistent_task_store.h" +#include "mongo/db/repl/replication_coordinator.h" +#include "mongo/db/repl/wait_for_majority_service.h" #include "mongo/db/s/dist_lock_manager.h" #include "mongo/db/s/forwardable_operation_metadata.h" #include "mongo/db/s/sharding_ddl_coordinator_gen.h" @@ -114,7 +116,18 @@ protected: auto opCtx = cc().makeOperationContext(); PersistentTaskStore<StateDoc> store(NamespaceString::kShardingDDLCoordinatorsNamespace); - store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernShardingTimeout); + try { + store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernNoTimeout); + } catch (const ExceptionFor<ErrorCodes::DuplicateKey>&) { + // A series of step-up and step-down events can cause a node to try and insert the + // document when it has already been persisted locally, but we must still wait for + // majority commit. + const auto replCoord = repl::ReplicationCoordinator::get(opCtx.get()); + const auto lastLocalOpTime = replCoord->getMyLastAppliedOpTime(); + WaitForMajorityService::get(opCtx->getServiceContext()) + .waitUntilMajority(lastLocalOpTime, opCtx.get()->getCancellationToken()) + .get(opCtx.get()); + } return std::move(newDoc); } @@ -126,7 +139,7 @@ protected: store.update(opCtx, BSON(StateDoc::kIdFieldName << newDoc.getId().toBSON()), newDoc.toBSON(), - WriteConcerns::kMajorityWriteConcernShardingTimeout); + WriteConcerns::kMajorityWriteConcernNoTimeout); return std::move(newDoc); } |