diff options
author | Silvia Surroca <silvia.surroca@mongodb.com> | 2022-06-07 07:31:12 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-06-07 07:57:45 +0000 |
commit | 2f3753e5c76a4dd71406ec445d045d3d1020bc4b (patch) | |
tree | 5214bcc62df31fa4456aee4323d3ff61fd8ce217 /src/mongo | |
parent | 960265d209ee5b4bef69b618e4cd1053b46105bf (diff) | |
download | mongo-2f3753e5c76a4dd71406ec445d045d3d1020bc4b.tar.gz |
SERVER-66336 ConfigsvrCoordinators initial checkpoint may incur in DuplicateKey error
Diffstat (limited to 'src/mongo')
3 files changed, 32 insertions, 9 deletions
diff --git a/src/mongo/db/s/config/configsvr_coordinator.h b/src/mongo/db/s/config/configsvr_coordinator.h index ff6d6de23dc..4f9310a3913 100644 --- a/src/mongo/db/s/config/configsvr_coordinator.h +++ b/src/mongo/db/s/config/configsvr_coordinator.h @@ -81,7 +81,7 @@ protected: store.update(opCtx, BSON(StateDoc::kIdFieldName << newDoc.getId().toBSON()), newDoc.toBSON(), - WriteConcerns::kMajorityWriteConcernShardingTimeout); + WriteConcerns::kMajorityWriteConcernNoTimeout); } template <typename StateDoc> diff --git a/src/mongo/db/s/config/set_cluster_parameter_coordinator.cpp b/src/mongo/db/s/config/set_cluster_parameter_coordinator.cpp index 74116b0dc92..36543c7cbaa 100644 --- a/src/mongo/db/s/config/set_cluster_parameter_coordinator.cpp +++ b/src/mongo/db/s/config/set_cluster_parameter_coordinator.cpp @@ -36,6 +36,8 @@ #include "mongo/db/commands/cluster_server_parameter_cmds_gen.h" #include "mongo/db/commands/set_cluster_parameter_invocation.h" #include "mongo/db/repl/read_concern_args.h" +#include "mongo/db/repl/replication_coordinator.h" +#include "mongo/db/repl/wait_for_majority_service.h" #include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/db/s/sharding_logging.h" #include "mongo/db/s/sharding_util.h" @@ -89,15 +91,23 @@ void SetClusterParameterCoordinator::_enterPhase(Phase newPhase) { "oldPhase"_attr = SetClusterParameterCoordinatorPhase_serializer(_doc.getPhase())); auto opCtx = cc().makeOperationContext(); - PersistentTaskStore<StateDoc> store(NamespaceString::kConfigsvrCoordinatorsNamespace); if (_doc.getPhase() == Phase::kUnset) { - store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernShardingTimeout); + PersistentTaskStore<StateDoc> store(NamespaceString::kConfigsvrCoordinatorsNamespace); + try { + store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernNoTimeout); + } catch (const ExceptionFor<ErrorCodes::DuplicateKey>&) { + // A series of step-up and step-down events can cause a node to try and insert the + // document when it has already been persisted locally, but we must still wait for + // majority commit. + const auto replCoord = repl::ReplicationCoordinator::get(opCtx.get()); + const auto lastLocalOpTime = replCoord->getMyLastAppliedOpTime(); + WaitForMajorityService::get(opCtx->getServiceContext()) + .waitUntilMajority(lastLocalOpTime, opCtx.get()->getCancellationToken()) + .get(opCtx.get()); + } } else { - store.update(opCtx.get(), - BSON(StateDoc::kIdFieldName << _coordId.toBSON()), - newDoc.toBSON(), - WriteConcerns::kMajorityWriteConcernNoTimeout); + _updateStateDocument(opCtx.get(), newDoc); } _doc = std::move(newDoc); diff --git a/src/mongo/db/s/config/set_user_write_block_mode_coordinator.cpp b/src/mongo/db/s/config/set_user_write_block_mode_coordinator.cpp index bb120a68771..2c3139f0cff 100644 --- a/src/mongo/db/s/config/set_user_write_block_mode_coordinator.cpp +++ b/src/mongo/db/s/config/set_user_write_block_mode_coordinator.cpp @@ -34,6 +34,8 @@ #include "mongo/base/checked_cast.h" #include "mongo/db/persistent_task_store.h" +#include "mongo/db/repl/replication_coordinator.h" +#include "mongo/db/repl/wait_for_majority_service.h" #include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/db/s/sharding_util.h" #include "mongo/db/s/user_writes_recoverable_critical_section_service.h" @@ -113,10 +115,21 @@ void SetUserWriteBlockModeCoordinator::_enterPhase(Phase newPhase) { "oldPhase"_attr = SetUserWriteBlockModeCoordinatorPhase_serializer(_doc.getPhase())); auto opCtx = cc().makeOperationContext(); - PersistentTaskStore<StateDoc> store(NamespaceString::kConfigsvrCoordinatorsNamespace); if (_doc.getPhase() == Phase::kUnset) { - store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernShardingTimeout); + PersistentTaskStore<StateDoc> store(NamespaceString::kConfigsvrCoordinatorsNamespace); + try { + store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernNoTimeout); + } catch (const ExceptionFor<ErrorCodes::DuplicateKey>&) { + // A series of step-up and step-down events can cause a node to try and insert the + // document when it has already been persisted locally, but we must still wait for + // majority commit. + const auto replCoord = repl::ReplicationCoordinator::get(opCtx.get()); + const auto lastLocalOpTime = replCoord->getMyLastAppliedOpTime(); + WaitForMajorityService::get(opCtx->getServiceContext()) + .waitUntilMajority(lastLocalOpTime, opCtx.get()->getCancellationToken()) + .get(opCtx.get()); + } } else { _updateStateDocument(opCtx.get(), newDoc); } |