From 8bb21e771efc237f95da64bcd7fcb86759ed1098 Mon Sep 17 00:00:00 2001 From: Moustafa Maher Date: Mon, 26 Jul 2021 19:39:16 +0000 Subject: SERVER-56846 Fail reconfig for shard server if it would change DWCF and CWWC is not set --- .../repl/replication_coordinator_external_state.h | 6 ++ ...replication_coordinator_external_state_impl.cpp | 6 ++ .../replication_coordinator_external_state_impl.h | 2 + ...replication_coordinator_external_state_mock.cpp | 5 ++ .../replication_coordinator_external_state_mock.h | 2 + src/mongo/db/repl/replication_coordinator_impl.cpp | 67 ++++++++++++++++------ 6 files changed, 70 insertions(+), 18 deletions(-) (limited to 'src/mongo/db/repl') diff --git a/src/mongo/db/repl/replication_coordinator_external_state.h b/src/mongo/db/repl/replication_coordinator_external_state.h index 02b8e71ef20..3fb2ecfc61a 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state.h +++ b/src/mongo/db/repl/replication_coordinator_external_state.h @@ -328,6 +328,12 @@ public: * Assert will be raised if running a command on the config server failed. */ virtual bool isCWWCSetOnConfigShard(OperationContext* opCtx) const = 0; + + /** + * Used to check if the server is a shardServer and has been added to a sharded cluster via + * addShard. + */ + virtual bool isShardPartOfShardedCluster(OperationContext* opCtx) const = 0; }; } // namespace repl diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp index 23606fedb07..36c740ffa57 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp @@ -1132,6 +1132,12 @@ void ReplicationCoordinatorExternalStateImpl::setupNoopWriter(Seconds waitTime) _noopWriter = std::make_unique(waitTime); } +bool ReplicationCoordinatorExternalStateImpl::isShardPartOfShardedCluster( + OperationContext* opCtx) const { + return serverGlobalParams.clusterRole == ClusterRole::ShardServer && + ShardingState::get(opCtx)->enabled(); +} + bool ReplicationCoordinatorExternalStateImpl::isCWWCSetOnConfigShard( OperationContext* opCtx) const { GetDefaultRWConcern configsvrRequest; diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.h b/src/mongo/db/repl/replication_coordinator_external_state_impl.h index 62fa04c7caf..39d684a55e3 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.h +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.h @@ -123,6 +123,8 @@ public: virtual bool isCWWCSetOnConfigShard(OperationContext* opCtx) const final; + virtual bool isShardPartOfShardedCluster(OperationContext* opCtx) const final; + private: /** * Stops data replication and returns with 'lock' locked. diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp index dc25c08fb9c..d2607393528 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp @@ -322,5 +322,10 @@ bool ReplicationCoordinatorExternalStateMock::isCWWCSetOnConfigShard( return true; } +bool ReplicationCoordinatorExternalStateMock::isShardPartOfShardedCluster( + OperationContext* opCtx) const { + return true; +} + } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h index fb74a55e2e3..22b2a7444b7 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h +++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h @@ -198,6 +198,8 @@ public: virtual bool isCWWCSetOnConfigShard(OperationContext* opCtx) const final; + virtual bool isShardPartOfShardedCluster(OperationContext* opCtx) const final; + private: StatusWith _localRsConfigDocument; StatusWith _localRsLastVoteDocument; diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 15c39da1255..32c95fdaf50 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -3502,26 +3502,57 @@ Status ReplicationCoordinatorImpl::_doReplSetReconfig(OperationContext* opCtx, return status; ReplSetConfig newConfig = newConfigStatus.getValue(); - // If the new config changes the replica set's implicit default write concern, we fail the - // reconfig command. This includes force reconfigs, but excludes reconfigs that bump the config - // term during step-up. The user should set a cluster-wide write concern and attempt the - // reconfig command again. We also need to exclude shard servers from this validation, as shard - // servers don't store the cluster-wide write concern. - if (!skipSafetyChecks /* skipping step-up reconfig */ && - repl::feature_flags::gDefaultWCMajority.isEnabled( - serverGlobalParams.featureCompatibility) && - serverGlobalParams.clusterRole != ClusterRole::ShardServer && - !repl::enableDefaultWriteConcernUpdatesForInitiate.load()) { + // Excluding reconfigs that bump the config term during step-up from checking against changing + // the implicit default write concern, as it is not needed. + if (!skipSafetyChecks /* skipping step-up reconfig */) { bool currIDWC = oldConfig.isImplicitDefaultWriteConcernMajority(); bool newIDWC = newConfig.isImplicitDefaultWriteConcernMajority(); - bool isCWWCSet = ReadWriteConcernDefaults::get(opCtx).isCWWCSet(opCtx); - if (!isCWWCSet && currIDWC != newIDWC) { - return Status( - ErrorCodes::NewReplicaSetConfigurationIncompatible, - str::stream() - << "Reconfig attempted to install a config that would change the " - "implicit default write concern. Use the setDefaultRWConcern command to " - "set a cluster-wide write concern and try the reconfig again."); + + // If the new config changes the replica set's implicit default write concern, we fail the + // reconfig command. This includes force reconfigs. + // The user should set a cluster-wide write concern and attempt the reconfig command again. + if (serverGlobalParams.clusterRole != ClusterRole::ShardServer) { + if (repl::feature_flags::gDefaultWCMajority.isEnabled( + serverGlobalParams.featureCompatibility) && + !repl::enableDefaultWriteConcernUpdatesForInitiate.load() && currIDWC != newIDWC && + !ReadWriteConcernDefaults::get(opCtx).isCWWCSet(opCtx)) { + return Status( + ErrorCodes::NewReplicaSetConfigurationIncompatible, + str::stream() + << "Reconfig attempted to install a config that would change the implicit " + "default write concern. Use the setDefaultRWConcern command to set a " + "cluster-wide write concern and try the reconfig again."); + } + } else { + // Allow all reconfigs if the shard is not part of a sharded cluster yet, however + // prevent changing the implicit default write concern to (w: 1) after it becomes part + // of a sharded cluster and CWWC is not set on the cluster. + // Remote call to the configServer should be done to check if CWWC is set on the + // cluster. + if (_externalState->isShardPartOfShardedCluster(opCtx) && currIDWC != newIDWC && + !newIDWC) { + try { + // Initiates a remote call to the config server. + if (!_externalState->isCWWCSetOnConfigShard(opCtx)) { + return Status( + ErrorCodes::NewReplicaSetConfigurationIncompatible, + str::stream() + << "Reconfig attempted to install a config that would change the " + "implicit default write concern on the shard to {w: 1}. Use the " + "setDefaultRWConcern command to set a cluster-wide write " + "concern on the cluster and try the reconfig again."); + } + } catch (const DBException& ex) { + return Status( + ErrorCodes::ConfigServerUnreachable, + str::stream() + << "Reconfig attempted to install a config that would change the " + "implicit default write concern on the shard to {w: 1}, but the " + "shard can not check if CWWC is set on the cluster, as the request " + "to the config server is failing with error: " + + ex.toString()); + } + } } } -- cgit v1.2.1