diff options
author | Matt Broadstone <mbroadst@mongodb.com> | 2022-08-29 18:26:32 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-29 19:12:05 +0000 |
commit | 382f12ccb49dac4ac5435b35543521f96e003207 (patch) | |
tree | a167a0e93282905dea22eb8e3536ca11dd589050 /src/mongo/db/repl | |
parent | a8ff73d03f7b21764479524f28c5bbae857fdc6b (diff) | |
download | mongo-382f12ccb49dac4ac5435b35543521f96e003207.tar.gz |
SERVER-69197 Clear lastCommitted and currentCommitted on split
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r-- | src/mongo/db/repl/repl_set_config.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/repl_set_config.h | 11 | ||||
-rw-r--r-- | src/mongo/db/repl/repl_set_config.idl | 3 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.h | 9 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp | 66 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp | 11 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_mock.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.h | 5 |
10 files changed, 50 insertions, 78 deletions
diff --git a/src/mongo/db/repl/repl_set_config.cpp b/src/mongo/db/repl/repl_set_config.cpp index 4565587f3c3..46f96bbbb59 100644 --- a/src/mongo/db/repl/repl_set_config.cpp +++ b/src/mongo/db/repl/repl_set_config.cpp @@ -791,14 +791,6 @@ bool ReplSetConfig::areWriteConcernModesTheSame(ReplSetConfig* otherConfig) cons return true; } -boost::optional<OpTime> ReplSetConfig::getShardSplitBlockOpTime() const { - return getSettings()->getShardSplitBlockOpTime(); -} - -void MutableReplSetConfig::removeShardSplitBlockOpTime() { - getSettings()->setShardSplitBlockOpTime(boost::none); -} - MemberConfig* MutableReplSetConfig::_findMemberByID(MemberId id) { for (auto it = getMembers().begin(); it != getMembers().end(); ++it) { if (it->getId() == id) { diff --git a/src/mongo/db/repl/repl_set_config.h b/src/mongo/db/repl/repl_set_config.h index c8d3943e08d..b6e34afdc19 100644 --- a/src/mongo/db/repl/repl_set_config.h +++ b/src/mongo/db/repl/repl_set_config.h @@ -139,11 +139,6 @@ public: */ void setSecondaryDelaySecsFieldDefault(MemberId memberId); - /** - * Removes the opTime field stored for an in-progress shard split operation. - */ - void removeShardSplitBlockOpTime(); - protected: MutableReplSetConfig() = default; @@ -565,12 +560,6 @@ public: */ bool areWriteConcernModesTheSame(ReplSetConfig* otherConfig) const; - /** - * Returns the opTime when an in-progress split operation started blocking requests, if one is - * currently running. - */ - boost::optional<OpTime> getShardSplitBlockOpTime() const; - private: /** * Sets replica set ID to 'defaultReplicaSetId' if 'cfg' does not contain an ID. diff --git a/src/mongo/db/repl/repl_set_config.idl b/src/mongo/db/repl/repl_set_config.idl index 9df1a636444..47b94a347a0 100644 --- a/src/mongo/db/repl/repl_set_config.idl +++ b/src/mongo/db/repl/repl_set_config.idl @@ -129,9 +129,6 @@ structs: type: objectid optional: true validator: { callback: "validateReplicaSetIdNotNull"} - shardSplitBlockOpTime: - type: optime - optional: true ReplSetConfigBase: description: "The complete configuration for the replica set" diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index ddc80433aa7..4a1ba95f347 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -1360,10 +1360,11 @@ private: void _scheduleHeartbeatReconfig(WithLock lk, const ReplSetConfig& newConfig); /** - * Determines if the provided config is a split config, and validates it for installation. + * Accepts a ReplSetConfig and resolves it either to itself, or the embedded shard split + * recipient config if it's present and self is a shard split recipient. Returns a tuple of the + * resolved config and a boolean indicating whether a recipient config was found. */ - std::tuple<StatusWith<ReplSetConfig>, boost::optional<OpTime>> _resolveConfigToApply( - const ReplSetConfig& config); + std::tuple<StatusWith<ReplSetConfig>, bool> _resolveConfigToApply(const ReplSetConfig& config); /** * Method to write a configuration transmitted via heartbeat message to stable storage. @@ -1377,7 +1378,7 @@ private: void _heartbeatReconfigFinish(const executor::TaskExecutor::CallbackArgs& cbData, const ReplSetConfig& newConfig, StatusWith<int> myIndex, - boost::optional<OpTime> shardSplitBlockOpTime); + bool isSplitRecipientConfig); /** * Calculates the time (in millis) left in quiesce mode and converts the value to int64. diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index 681baa5e13f..057d101a6ff 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -664,10 +664,10 @@ void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig(WithLock lk, .status_with_transitional_ignore(); } -std::tuple<StatusWith<ReplSetConfig>, boost::optional<OpTime>> -ReplicationCoordinatorImpl::_resolveConfigToApply(const ReplSetConfig& config) { +std::tuple<StatusWith<ReplSetConfig>, bool> ReplicationCoordinatorImpl::_resolveConfigToApply( + const ReplSetConfig& config) { if (!_settings.isServerless() || !config.isSplitConfig()) { - return {config, boost::none}; + return {config, false}; } stdx::unique_lock<Latch> lk(_mutex); @@ -684,12 +684,12 @@ ReplicationCoordinatorImpl::_resolveConfigToApply(const ReplSetConfig& config) { }); if (foundSelfInMembers) { - return {config, boost::none}; + return {config, false}; } return {Status(ErrorCodes::NotYetInitialized, "Cannot apply a split config if the current config is uninitialized"), - boost::none}; + false}; } auto recipientConfig = config.getRecipientConfig(); @@ -697,25 +697,21 @@ ReplicationCoordinatorImpl::_resolveConfigToApply(const ReplSetConfig& config) { if (recipientConfig->findMemberByHostAndPort(selfMember.getHostAndPort())) { if (selfMember.getNumVotes() > 0) { return {Status(ErrorCodes::BadValue, "Cannot apply recipient config to a voting node"), - boost::none}; + false}; } if (_rsConfig.getReplSetName() == recipientConfig->getReplSetName()) { return {Status(ErrorCodes::InvalidReplicaSetConfig, "Cannot apply recipient config since current config and recipient " "config have the same set name."), - boost::none}; + false}; } - invariant(recipientConfig->getShardSplitBlockOpTime()); - auto shardSplitBlockOpTime = *recipientConfig->getShardSplitBlockOpTime(); - auto mutableConfig = recipientConfig->getMutable(); - mutableConfig.removeShardSplitBlockOpTime(); - return {ReplSetConfig(std::move(mutableConfig)), shardSplitBlockOpTime}; + return {ReplSetConfig(*recipientConfig), true}; } - return {config, boost::none}; + return {config, false}; } void ReplicationCoordinatorImpl::_heartbeatReconfigStore( @@ -731,7 +727,7 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigStore( return; } - const auto [swConfig, shardSplitBlockOpTime] = _resolveConfigToApply(newConfig); + const auto [swConfig, isSplitRecipientConfig] = _resolveConfigToApply(newConfig); if (!swConfig.isOK()) { LOGV2_WARNING(6234600, "Ignoring new configuration in heartbeat response because it is invalid", @@ -744,7 +740,7 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigStore( } const auto configToApply = swConfig.getValue(); - if (shardSplitBlockOpTime) { + if (isSplitRecipientConfig) { LOGV2(6309200, "Applying a recipient config for a shard split operation.", "config"_attr = configToApply); @@ -804,15 +800,17 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigStore( configToApply.getConfigVersionAndTerm()); auto opCtx = cc().makeOperationContext(); - // Don't write the no-op for config learned via heartbeats. - auto status = [&, isRecipientConfig = shardSplitBlockOpTime.has_value()]() { - if (isRecipientConfig) { - return _externalState->replaceLocalConfigDocument(opCtx.get(), - configToApply.toBSON()); - } else { - return _externalState->storeLocalConfigDocument( - opCtx.get(), configToApply.toBSON(), false /* writeOplog */); + auto status = [this, + opCtx = opCtx.get(), + configToApply, + isSplitRecipientConfig = isSplitRecipientConfig]() { + if (isSplitRecipientConfig) { + return _externalState->replaceLocalConfigDocument(opCtx, configToApply.toBSON()); } + + // Don't write the no-op for config learned via heartbeats. + return _externalState->storeLocalConfigDocument( + opCtx, configToApply.toBSON(), false /* writeOplog */); }(); // Wait for durability of the new config document. @@ -859,7 +857,7 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigStore( shouldStartDataReplication = true; } - if (shardSplitBlockOpTime) { + if (isSplitRecipientConfig) { // Donor access blockers are removed from donor nodes via the shard split op observer. // Donor access blockers are removed from recipient nodes when the node applies the // recipient config. When the recipient primary steps up it will delete its state @@ -879,7 +877,7 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigStore( "configToApplyVersionAndTerm"_attr = configToApply.getConfigVersionAndTerm()); } - _heartbeatReconfigFinish(cbd, configToApply, myIndex, shardSplitBlockOpTime); + _heartbeatReconfigFinish(cbd, configToApply, myIndex, isSplitRecipientConfig); // Start data replication after the config has been installed. if (shouldStartDataReplication) { @@ -910,7 +908,7 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigFinish( const executor::TaskExecutor::CallbackArgs& cbData, const ReplSetConfig& newConfig, StatusWith<int> myIndex, - boost::optional<OpTime> shardSplitBlockOpTime) { + bool isSplitRecipientConfig) { if (cbData.status == ErrorCodes::CallbackCanceled) { return; } @@ -924,7 +922,7 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigFinish( ->scheduleWorkAt(_replExecutor->now() + Milliseconds{10}, [=](const executor::TaskExecutor::CallbackArgs& cbData) { _heartbeatReconfigFinish( - cbData, newConfig, myIndex, shardSplitBlockOpTime); + cbData, newConfig, myIndex, isSplitRecipientConfig); }) .status_with_transitional_ignore(); return; @@ -949,7 +947,7 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigFinish( ->onEvent(electionFinishedEvent, [=](const executor::TaskExecutor::CallbackArgs& cbData) { _heartbeatReconfigFinish( - cbData, newConfig, myIndex, shardSplitBlockOpTime); + cbData, newConfig, myIndex, isSplitRecipientConfig); }) .status_with_transitional_ignore(); return; @@ -1002,7 +1000,7 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigFinish( invariant(_rsConfigState == kConfigHBReconfiguring); invariant(!_rsConfig.isInitialized() || _rsConfig.getConfigVersionAndTerm() < newConfig.getConfigVersionAndTerm() || - _selfIndex < 0 || shardSplitBlockOpTime); + _selfIndex < 0 || isSplitRecipientConfig); if (!myIndex.isOK()) { switch (myIndex.getStatus().code()) { @@ -1040,8 +1038,14 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigFinish( const PostMemberStateUpdateAction action = _setCurrentRSConfig(lk, opCtx.get(), newConfig, myIndexValue); - if (shardSplitBlockOpTime) { - _topCoord->resetLastCommittedOpTime(*shardSplitBlockOpTime); + if (isSplitRecipientConfig) { + LOGV2(8423364, + "Clearing the commit point and current committed snapshot after applying split " + "recipient config."); + // Clear lastCommittedOpTime by passing in a default constructed OpTimeAndWallTime, and + // indicating that this is `forInitiate`. + _topCoord->advanceLastCommittedOpTimeAndWallTime(OpTimeAndWallTime(), false, true); + _clearCommittedSnapshot_inlock(); } lk.unlock(); diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp index 23e1d594488..62e2c412cc6 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp @@ -705,8 +705,7 @@ public: // Add the raw config object. auto conf = ReplSetConfig::parse(makeConfigObj(configVersion, termVersion)); - auto splitConf = serverless::makeSplitConfig( - conf, _recipientSetName, _recipientTag, repl::OpTime(Timestamp(12345, 1), termVersion)); + auto splitConf = serverless::makeSplitConfig(conf, _recipientSetName, _recipientTag); // makeSplitConf increment the config version. We don't want that here as it makes the unit // test case harder to follow. @@ -820,10 +819,10 @@ TEST_F(ReplCoordHBV1SplitConfigTest, RecipientNodeApplyConfig) { getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(responseObj)); getNet()->runReadyNetworkOperations(); - // The recipient's lastCommittedOpTime is reset to the blockOpTime on applying the recipient - // config. - ASSERT_EQ(getReplCoord()->getLastCommittedOpTime(), - repl::OpTime(Timestamp(12345, 1), getReplCoord()->getConfigTerm())); + // The recipient's lastCommittedOpTime and currentCommittedSnapshotOpTime are cleared on + // applying the recipient config. + ASSERT(getReplCoord()->getLastCommittedOpTime().isNull()); + ASSERT(getReplCoord()->getCurrentCommittedSnapshotOpTime().isNull()); // Applying the recipient config will increase the configVersion by 1. validateNextRequest( diff --git a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp index 3c61693428e..003e70c0ff7 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp @@ -1391,10 +1391,8 @@ TEST_F(ReplCoordReconfigTest, MustSendHeartbeatToSplitConfigRecipients) { BSONObjBuilder result; const auto opCtx = makeOperationContext(); - auto newConfig = mongo::serverless::makeSplitConfig(ReplSetConfig::parse(oldConfigObj), - "recipientSet", - recipientTagName, - repl::OpTime(Timestamp(100, 0), 1)); + auto newConfig = mongo::serverless::makeSplitConfig( + ReplSetConfig::parse(oldConfigObj), "recipientSet", recipientTagName); Status status(ErrorCodes::InternalError, "Not Set"); stdx::thread reconfigThread; diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp index 691c7411e9e..88b4b07d20d 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_mock.cpp @@ -171,7 +171,9 @@ void ReplicationCoordinatorMock::setAwaitReplicationReturnValueFunction( SharedSemiFuture<void> ReplicationCoordinatorMock::awaitReplicationAsyncNoWTimeout( const OpTime& opTime, const WriteConcernOptions& writeConcern) { - MONGO_UNREACHABLE; + auto opCtx = cc().makeOperationContext(); + auto result = _awaitReplicationReturnValueFunction(opCtx.get(), opTime); + return Future<ReplicationCoordinator::StatusAndDuration>::makeReady(result).ignoreValue(); } void ReplicationCoordinatorMock::stepDown(OperationContext* opCtx, diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp index b112eab9af0..030ad0a2792 100644 --- a/src/mongo/db/repl/topology_coordinator.cpp +++ b/src/mongo/db/repl/topology_coordinator.cpp @@ -2965,11 +2965,6 @@ bool TopologyCoordinator::advanceLastCommittedOpTimeAndWallTime(OpTimeAndWallTim return true; } -void TopologyCoordinator::resetLastCommittedOpTime(const OpTime& lastCommittedOpTime) { - LOGV2(8423364, "Resetting commit point", "lastCommittedOpTime"_attr = lastCommittedOpTime); - _lastCommittedOpTimeAndWallTime = OpTimeAndWallTime(lastCommittedOpTime, Date_t::now()); -} - OpTime TopologyCoordinator::getLastCommittedOpTime() const { return _lastCommittedOpTimeAndWallTime.opTime; } diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h index 6c2231d3904..3285a5b4825 100644 --- a/src/mongo/db/repl/topology_coordinator.h +++ b/src/mongo/db/repl/topology_coordinator.h @@ -322,11 +322,6 @@ public: bool forInitiate = false); /** - * Resets the commit point to the provided opTime, with a wall time of now. - */ - void resetLastCommittedOpTime(const OpTime& lastCommittedOpTime); - - /** * Returns the OpTime of the latest majority-committed op known to this server. */ OpTime getLastCommittedOpTime() const; |