diff options
author | Suganthi Mani <suganthi.mani@mongodb.com> | 2020-05-07 08:30:13 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-30 03:21:52 +0000 |
commit | d00b75bfcac3ac74036ac6c2ceec4e8b42ac93a0 (patch) | |
tree | 0dadf6ebd8824e80dcec2e8ccb412c84c3affe8d /src | |
parent | 1e1af7c30b287e72773f72a9ba31d3bdfae1502a (diff) | |
download | mongo-d00b75bfcac3ac74036ac6c2ceec4e8b42ac93a0.tar.gz |
SERVER-46379 upgrade/downgrade support for initial sync semantics.
Diffstat (limited to 'src')
14 files changed, 228 insertions, 83 deletions
diff --git a/src/mongo/db/commands/feature_compatibility_version.h b/src/mongo/db/commands/feature_compatibility_version.h index fdb5d04fa15..d1058ae57e9 100644 --- a/src/mongo/db/commands/feature_compatibility_version.h +++ b/src/mongo/db/commands/feature_compatibility_version.h @@ -136,6 +136,10 @@ public: ~FixedFCVRegion() = default; + void release() { + _lk.reset(); + } + private: boost::optional<Lock::SharedLock> _lk; }; diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp index 8768bcab8a5..7f5e2a6400e 100644 --- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp +++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp @@ -254,6 +254,31 @@ public: return true; } + auto replCoord = repl::ReplicationCoordinator::get(opCtx); + const bool isReplSet = + replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet; + + uassert(ErrorCodes::ConflictingOperationInProgress, + str::stream() << "Cannot downgrade the cluster when the replica set config " + << "contains 'newlyAdded' members; wait for those members to " + << "finish its initial sync procedure", + !(isReplSet && replCoord->replSetContainsNewlyAddedMembers())); + + // We should make sure the current config w/o 'newlyAdded' members got replicated + // to all nodes. + LOGV2(4637904, "Waiting for the current replica set config to propagate to all nodes."); + // If a write concern is given, we'll use its wTimeout. It's kNoTimeout by default. + WriteConcernOptions writeConcern(repl::ReplSetConfig::kConfigAllWriteConcernName, + WriteConcernOptions::SyncMode::NONE, + opCtx->getWriteConcern().wTimeout); + writeConcern.checkCondition = WriteConcernOptions::CheckCondition::Config; + repl::OpTime fakeOpTime(Timestamp(1, 1), replCoord->getTerm()); + uassertStatusOKWithContext( + replCoord->awaitReplication(opCtx, fakeOpTime, writeConcern).status, + "Failed to wait for the current replica set config to propagate to all " + "nodes"); + LOGV2(4637905, "The current replica set config has been propagated to all nodes."); + FeatureCompatibilityVersion::setTargetDowngrade(opCtx); { @@ -267,9 +292,6 @@ public: Lock::GlobalLock lk(opCtx, MODE_S); } - auto replCoord = repl::ReplicationCoordinator::get(opCtx); - const bool isReplSet = - replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet; if (failDowngrading.shouldFail()) return false; diff --git a/src/mongo/db/repl/repl_set_config.cpp b/src/mongo/db/repl/repl_set_config.cpp index 02ea0d4cb1d..a0df57d6315 100644 --- a/src/mongo/db/repl/repl_set_config.cpp +++ b/src/mongo/db/repl/repl_set_config.cpp @@ -651,6 +651,15 @@ bool ReplSetConfig::containsArbiter() const { return false; } +bool ReplSetConfig::containsNewlyAddedMembers() const { + for (MemberIterator mem = membersBegin(); mem != membersEnd(); mem++) { + if (mem->isNewlyAdded()) { + return true; + } + } + return false; +} + MutableReplSetConfig ReplSetConfig::getMutable() const { return *static_cast<const MutableReplSetConfig*>(this); } diff --git a/src/mongo/db/repl/repl_set_config.h b/src/mongo/db/repl/repl_set_config.h index a3ac31fb58b..fe69f96d5d4 100644 --- a/src/mongo/db/repl/repl_set_config.h +++ b/src/mongo/db/repl/repl_set_config.h @@ -486,6 +486,12 @@ public: bool containsArbiter() const; /** + * Returns true if this replica set has at least one member with 'newlyAdded' + * field set to true. + */ + bool containsNewlyAddedMembers() const; + + /** * Returns a mutable (but not directly usable) copy of the config. */ MutableReplSetConfig getMutable() const; diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h index bde2fc6f788..a2f3d681853 100644 --- a/src/mongo/db/repl/replication_coordinator.h +++ b/src/mongo/db/repl/replication_coordinator.h @@ -941,6 +941,12 @@ public: virtual bool setContainsArbiter() const = 0; /** + * Returns true if the current replica set config has at least one member with 'newlyAdded' + * field set to true. + */ + virtual bool replSetContainsNewlyAddedMembers() const = 0; + + /** * Instructs the ReplicationCoordinator to recalculate the stable timestamp and advance it for * storage if needed. */ diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index ad687f88f2a..31ec95bd5f4 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -52,6 +52,7 @@ #include "mongo/db/catalog/commit_quorum_options.h" #include "mongo/db/client.h" #include "mongo/db/commands.h" +#include "mongo/db/commands/feature_compatibility_version.h" #include "mongo/db/commands/test_commands_enabled.h" #include "mongo/db/concurrency/d_concurrency.h" #include "mongo/db/concurrency/replication_state_transition_lock_guard.h" @@ -129,6 +130,8 @@ MONGO_FAIL_POINT_DEFINE(hangAfterReconfigOnDrainComplete); MONGO_FAIL_POINT_DEFINE(doNotRemoveNewlyAddedOnHeartbeats); // Will hang right after setting the currentOp info associated with an automatic reconfig. MONGO_FAIL_POINT_DEFINE(hangDuringAutomaticReconfig); +// Make reconfig command hang before validating new config. +MONGO_FAIL_POINT_DEFINE(ReconfigHangBeforeConfigValidationCheck); // Number of times we tried to go live as a secondary. Counter64 attemptsToBecomeSecondary; @@ -3169,6 +3172,19 @@ Status ReplicationCoordinatorImpl::processReplSetFreeze(int secs, BSONObjBuilder return Status::OK(); } +bool ReplicationCoordinatorImpl::_supportsAutomaticReconfig() const { + if (!enableAutomaticReconfig) { + return false; + } + + if (serverGlobalParams.featureCompatibility.getVersion() != + ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo46) { + return false; + } + + return true; +} + Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* opCtx, const ReplSetReconfigArgs& args, BSONObjBuilder* resultObj) { @@ -3221,60 +3237,58 @@ Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* opCt newConfig = ReplSetConfig(std::move(newMutableConfig)); } - if (enableAutomaticReconfig) { - boost::optional<MutableReplSetConfig> newMutableConfig; - - // Set the 'newlyAdded' field to true for all new voting nodes. - for (int i = 0; i < newConfig.getNumMembers(); i++) { - const auto newMem = newConfig.getMemberAt(i); - - // In a reconfig, the 'newlyAdded' flag should never already be set for - // this member. If it is set, throw an error. - if (newMem.isNewlyAdded()) { - str::stream errmsg; - errmsg << "Cannot provide " << MemberConfig::kNewlyAddedFieldName - << " field to member config during reconfig."; - LOGV2_ERROR( - 4634900, - "Initializing 'newlyAdded' field to member has failed with bad status.", - "errmsg"_attr = std::string(errmsg)); - return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg); - } + boost::optional<MutableReplSetConfig> newMutableConfig; + + // Set the 'newlyAdded' field to true for all new voting nodes. + for (int i = 0; i < newConfig.getNumMembers(); i++) { + const auto newMem = newConfig.getMemberAt(i); + + // In a reconfig, the 'newlyAdded' flag should never already be set for + // this member. If it is set, throw an error. + if (newMem.isNewlyAdded()) { + str::stream errmsg; + errmsg << "Cannot provide " << MemberConfig::kNewlyAddedFieldName + << " field to member config during reconfig."; + LOGV2_ERROR(4634900, + "Initializing 'newlyAdded' field to member has failed with bad status.", + "errmsg"_attr = std::string(errmsg)); + return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg); + } - // We should never set the 'newlyAdded' field for arbiters, or during force - // reconfigs. - if (newMem.isArbiter() || args.force) { - continue; - } - const auto newMemId = newMem.getId(); - const auto oldMem = oldConfig.findMemberByID(newMemId.getData()); - - const bool isNewVotingMember = (oldMem == nullptr && newMem.isVoter()); - const bool isCurrentlyNewlyAdded = (oldMem != nullptr && oldMem->isNewlyAdded()); - - // Append the 'newlyAdded' field if the node: - // 1) Is a new, voting node - // 2) Already has a 'newlyAdded' field in the old config - if (isNewVotingMember || isCurrentlyNewlyAdded) { - if (!newMutableConfig) { - newMutableConfig = newConfig.getMutable(); - } - newMutableConfig->addNewlyAddedFieldForMember(newMemId); - } + // We should never set the 'newlyAdded' field for arbiters, or when automatic reconfig + // is disabled, or during force reconfigs. + if (newMem.isArbiter() || !_supportsAutomaticReconfig() || args.force) { + continue; } - if (newMutableConfig) { - newConfig = ReplSetConfig(*std::move(newMutableConfig)); - LOGV2(4634400, - "Appended the 'newlyAdded' field to a node in the new config. Nodes with " - "the 'newlyAdded' field will be considered to have 'votes:0'. Upon " - "transition to SECONDARY, this field will be automatically removed.", - "newConfigObj"_attr = newConfig.toBSON(), - "userProvidedConfig"_attr = args.newConfigObj, - "oldConfig"_attr = oldConfig.toBSON()); + const auto newMemId = newMem.getId(); + const auto oldMem = oldConfig.findMemberByID(newMemId.getData()); + + const bool isNewVotingMember = (oldMem == nullptr && newMem.isVoter()); + const bool isCurrentlyNewlyAdded = (oldMem != nullptr && oldMem->isNewlyAdded()); + + // Append the 'newlyAdded' field if the node: + // 1) Is a new, voting node + // 2) Already has a 'newlyAdded' field in the old config + if (isNewVotingMember || isCurrentlyNewlyAdded) { + if (!newMutableConfig) { + newMutableConfig = newConfig.getMutable(); + } + newMutableConfig->addNewlyAddedFieldForMember(newMemId); } } + if (newMutableConfig) { + newConfig = ReplSetConfig(*std::move(newMutableConfig)); + LOGV2(4634400, + "Appended the 'newlyAdded' field to a node in the new config. Nodes with " + "the 'newlyAdded' field will be considered to have 'votes:0'. Upon " + "transition to SECONDARY, this field will be automatically removed.", + "newConfigObj"_attr = newConfig.toBSON(), + "userProvidedConfig"_attr = args.newConfigObj, + "oldConfig"_attr = oldConfig.toBSON()); + } + return newConfig; }; @@ -3378,6 +3392,12 @@ Status ReplicationCoordinatorImpl::doReplSetReconfig(OperationContext* opCtx, int myIndex = _selfIndex; lk.unlock(); + // Automatic reconfig ("newlyAdded" field in repl config) is supported only from FCV4.6+. + // So, acquire FCV mutex lock in shared mode to block writers from modifying the fcv document + // to make sure fcv is not changed between getNewConfig() and storing the new config + // document locally. + FixedFCVRegion fixedFcvRegion(opCtx); + // Call the callback to get the new config given the old one. auto newConfigStatus = getNewConfig(oldConfig, topCoordTerm); Status status = newConfigStatus.getStatus(); @@ -3399,6 +3419,47 @@ Status ReplicationCoordinatorImpl::doReplSetReconfig(OperationContext* opCtx, return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible, validateStatus.reason()); } + // Since at this point, we have validated the new config, we are assuming the new config follows + // the safe reconfig rules. + auto needsFcvLock = [&]() -> bool { + int oldVoters = 0, newVoters = 0; + bool oldHasNewlyAdded = false, newHasNewlyAdded = false; + + std::for_each(oldConfig.membersBegin(), oldConfig.membersEnd(), [&](const MemberConfig& m) { + if (m.isVoter()) + oldVoters++; + oldHasNewlyAdded = oldHasNewlyAdded || m.isNewlyAdded(); + }); + std::for_each(newConfig.membersBegin(), newConfig.membersEnd(), [&](const MemberConfig& m) { + if (m.isVoter()) + newVoters++; + newHasNewlyAdded = newHasNewlyAdded || m.isNewlyAdded(); + }); + + // It's illegal for the new config to contain "newlyAdded" field when automatic reconfig is + // disabled. If the primary receives a new config with 'newlyAdded' field via + // replSetReconfig command, then the primary should have already uasserted earlier in + // getNewConfig(). + invariant(_supportsAutomaticReconfig() || !newHasNewlyAdded); + + return (!oldHasNewlyAdded && newHasNewlyAdded) || (newVoters > oldVoters); + }; + + // We need to take fcv lock only for 2 cases: + // 1) For fcv 4.4, addition of new voter nodes. + // 2) For fcv 4.6+, only if the current config doesn't contain the 'newlyAdded' field but the + // new config got mutated to append 'newlyAdded' field. + if (force || !needsFcvLock()) { + fixedFcvRegion.release(); + } + + if (MONGO_unlikely(ReconfigHangBeforeConfigValidationCheck.shouldFail())) { + LOGV2(4637900, + "ReconfigHangBeforeConfigValidationCheck fail point " + "enabled. Blocking until fail point is disabled."); + ReconfigHangBeforeConfigValidationCheck.pauseWhileSet(opCtx); + } + // Make sure we can find ourselves in the config. If the config contents have not changed, then // we bypass the check for finding ourselves in the config, since we know it should already be // satisfied. @@ -5506,6 +5567,11 @@ bool ReplicationCoordinatorImpl::setContainsArbiter() const { return _rsConfig.containsArbiter(); } +bool ReplicationCoordinatorImpl::replSetContainsNewlyAddedMembers() const { + stdx::lock_guard<Latch> lock(_mutex); + return _rsConfig.containsNewlyAddedMembers(); +} + void ReplicationCoordinatorImpl::ReadWriteAbility::setCanAcceptNonLocalWrites( WithLock lk, OperationContext* opCtx, bool canAcceptWrites) { // We must be holding the RSTL in mode X to change _canAcceptNonLocalWrites. diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index cf2e1dad7d7..6c39ff5d294 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -341,6 +341,8 @@ public: virtual bool setContainsArbiter() const override; + virtual bool replSetContainsNewlyAddedMembers() const override; + virtual void attemptToAdvanceStableTimestamp() override; virtual void finishRecoveryIfEligible(OperationContext* opCtx) override; @@ -1442,6 +1444,11 @@ private: MemberId memberId, ConfigVersionAndTerm versionAndTerm); + /** + * Checks whether replication coordinator supports automatic reconfig. + */ + bool _supportsAutomaticReconfig() const; + /* * Calculates and returns the read preference for the node. */ diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index 187d831b3e5..21a59cf9b08 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -285,38 +285,45 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse( _wakeReadyWaiters(lk); } - if (enableAutomaticReconfig) { - // When receiving a heartbeat response indicating that the remote is in a state past - // STARTUP_2, the primary will initiate a reconfig to remove the 'newlyAdded' field for that - // node (if present). This field is normally set when we add new members with votes:1 to the - // set. - if (_getMemberState_inlock().primary() && hbStatusResponse.isOK() && - hbStatusResponse.getValue().hasState()) { - auto remoteState = hbStatusResponse.getValue().getState(); - if (remoteState == MemberState::RS_SECONDARY || - remoteState == MemberState::RS_RECOVERING || - remoteState == MemberState::RS_ROLLBACK) { - const auto mem = _rsConfig.getMemberAt(targetIndex); - const auto memId = mem.getId(); - if (mem.isNewlyAdded()) { - auto status = _replExecutor->scheduleWork( - [=](const executor::TaskExecutor::CallbackArgs& cbData) { - _reconfigToRemoveNewlyAddedField( - cbData, memId, _rsConfig.getConfigVersionAndTerm()); - }); - - if (!status.isOK()) { - LOGV2_DEBUG(4634500, - 1, - "Failed to schedule work for removing 'newlyAdded' field.", - "memberId"_attr = memId.getData(), - "error"_attr = status.getStatus()); - } else { - LOGV2_DEBUG(4634501, - 1, - "Scheduled automatic reconfig to remove 'newlyAdded' field.", - "memberId"_attr = memId.getData()); - } + // When receiving a heartbeat response indicating that the remote is in a state past + // STARTUP_2, the primary will initiate a reconfig to remove the 'newlyAdded' field for that + // node (if present). This field is normally set when we add new members with votes:1 to the + // set. + if (_getMemberState_inlock().primary() && hbStatusResponse.isOK() && + hbStatusResponse.getValue().hasState()) { + auto remoteState = hbStatusResponse.getValue().getState(); + if (remoteState == MemberState::RS_SECONDARY || remoteState == MemberState::RS_RECOVERING || + remoteState == MemberState::RS_ROLLBACK) { + const auto mem = _rsConfig.findMemberByHostAndPort(target); + if (mem && mem->isNewlyAdded()) { + // 'NewlyAdded' field can only exist if automatic reconfig is supported, with the + // exception of upgrading/downgrading fcv document. And, it's safe to have that + // exception because a node can't downgrade the binary version until its FCV + // document is fully downgraded. So, its impossible for a node with downgraded + // binaries to have on-disk repl config with 'newlyAdded' fields. + invariant( + _supportsAutomaticReconfig() || + serverGlobalParams.featureCompatibility.getVersion() > + ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo44); + + const auto memId = mem->getId(); + auto status = _replExecutor->scheduleWork( + [=](const executor::TaskExecutor::CallbackArgs& cbData) { + _reconfigToRemoveNewlyAddedField( + cbData, memId, _rsConfig.getConfigVersionAndTerm()); + }); + + if (!status.isOK()) { + LOGV2_DEBUG(4634500, + 1, + "Failed to schedule work for removing 'newlyAdded' field.", + "memberId"_attr = memId.getData(), + "error"_attr = status.getStatus()); + } else { + LOGV2_DEBUG(4634501, + 1, + "Scheduled automatic reconfig to remove 'newlyAdded' field.", + "memberId"_attr = memId.getData()); } } } diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp index 8099f3d828d..7c1edd8952c 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_mock.cpp @@ -558,6 +558,10 @@ bool ReplicationCoordinatorMock::setContainsArbiter() const { return false; } +bool ReplicationCoordinatorMock::replSetContainsNewlyAddedMembers() const { + return false; +} + void ReplicationCoordinatorMock::attemptToAdvanceStableTimestamp() { return; } diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h index 7097bc2720a..5989c7903d4 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.h +++ b/src/mongo/db/repl/replication_coordinator_mock.h @@ -320,6 +320,8 @@ public: virtual bool setContainsArbiter() const override; + virtual bool replSetContainsNewlyAddedMembers() const override; + virtual void attemptToAdvanceStableTimestamp() override; virtual void finishRecoveryIfEligible(OperationContext* opCtx) override; diff --git a/src/mongo/db/repl/replication_coordinator_noop.cpp b/src/mongo/db/repl/replication_coordinator_noop.cpp index 736ef0d1f94..18af1802c8d 100644 --- a/src/mongo/db/repl/replication_coordinator_noop.cpp +++ b/src/mongo/db/repl/replication_coordinator_noop.cpp @@ -466,6 +466,10 @@ bool ReplicationCoordinatorNoOp::setContainsArbiter() const { MONGO_UNREACHABLE; } +bool ReplicationCoordinatorNoOp::replSetContainsNewlyAddedMembers() const { + MONGO_UNREACHABLE; +} + void ReplicationCoordinatorNoOp::attemptToAdvanceStableTimestamp() { MONGO_UNREACHABLE; } diff --git a/src/mongo/db/repl/replication_coordinator_noop.h b/src/mongo/db/repl/replication_coordinator_noop.h index d2fde9dcf1b..901873341f5 100644 --- a/src/mongo/db/repl/replication_coordinator_noop.h +++ b/src/mongo/db/repl/replication_coordinator_noop.h @@ -257,6 +257,8 @@ public: bool setContainsArbiter() const final; + bool replSetContainsNewlyAddedMembers() const final; + void attemptToAdvanceStableTimestamp() final; void finishRecoveryIfEligible(OperationContext* opCtx) final; diff --git a/src/mongo/embedded/replication_coordinator_embedded.cpp b/src/mongo/embedded/replication_coordinator_embedded.cpp index 8e3f2a6d023..f1643f0e0ba 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.cpp +++ b/src/mongo/embedded/replication_coordinator_embedded.cpp @@ -493,6 +493,10 @@ bool ReplicationCoordinatorEmbedded::setContainsArbiter() const { UASSERT_NOT_IMPLEMENTED; } +bool ReplicationCoordinatorEmbedded::replSetContainsNewlyAddedMembers() const { + UASSERT_NOT_IMPLEMENTED; +} + void ReplicationCoordinatorEmbedded::attemptToAdvanceStableTimestamp() { UASSERT_NOT_IMPLEMENTED; } diff --git a/src/mongo/embedded/replication_coordinator_embedded.h b/src/mongo/embedded/replication_coordinator_embedded.h index a67f53809ee..f8bd0dbc15c 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.h +++ b/src/mongo/embedded/replication_coordinator_embedded.h @@ -265,6 +265,8 @@ public: bool setContainsArbiter() const override; + bool replSetContainsNewlyAddedMembers() const override; + void attemptToAdvanceStableTimestamp() override; void finishRecoveryIfEligible(OperationContext* opCtx) override; |