diff options
author | Paolo Polato <paolo.polato@mongodb.com> | 2021-07-16 07:39:39 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-07-26 13:01:34 +0000 |
commit | a61de4f56d4f5d83e29117ebd845385f2dc356a1 (patch) | |
tree | cf2e26aa248702ad4fea3d50ad37d5a668ccc31e | |
parent | 9f1d821bb54f7192fc5192e9cf891fc709c051cb (diff) | |
download | mongo-a61de4f56d4f5d83e29117ebd845385f2dc356a1.tar.gz |
SERVER-57790 Stop the balancer thread while serving setFCV.
(cherry picked from commit a3e0e9d2998bd9a1ce0f8f975b364ef614e2d57c)
-rw-r--r-- | src/mongo/db/commands/set_feature_compatibility_version_command.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/s/balancer/balancer.cpp | 24 | ||||
-rw-r--r-- | src/mongo/db/s/balancer/balancer.h | 48 |
3 files changed, 75 insertions, 3 deletions
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp index 2ba40c78d76..6dd6da3579d 100644 --- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp +++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp @@ -60,6 +60,7 @@ #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/tenant_migration_donor_service.h" #include "mongo/db/repl/tenant_migration_recipient_service.h" +#include "mongo/db/s/balancer/balancer.h" #include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/db/s/resharding/coordinator_document_gen.h" #include "mongo/db/s/resharding/resharding_coordinator_service.h" @@ -399,10 +400,13 @@ public: boost::optional<Timestamp> changeTimestamp; if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) { + // TODO(SERVER-53283): Remove the call to requestPause() + // to allow the execution of balancer rounds during setFCV(). + auto scopedBalancerPauseRequest = Balancer::get(opCtx)->requestPause(); + // The Config Server creates a new ID (i.e., timestamp) when it receives an upgrade or // downgrade request. Alternatively, the request refers to a previously aborted // operation for which the local FCV document must contain the ID to be reused. - if (!serverGlobalParams.featureCompatibility.isUpgradingOrDowngrading()) { const auto now = VectorClock::get(opCtx)->getTime(); changeTimestamp = now.clusterTime().asTimestamp(); diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp index 13c676ec04a..c91a4c766d8 100644 --- a/src/mongo/db/s/balancer/balancer.cpp +++ b/src/mongo/db/s/balancer/balancer.cpp @@ -256,6 +256,10 @@ void Balancer::joinCurrentRound(OperationContext* opCtx) { }); } +Balancer::ScopedPauseBalancerRequest Balancer::requestPause() { + return ScopedPauseBalancerRequest(this); +} + Status Balancer::rebalanceSingleChunk(OperationContext* opCtx, const ChunkType& chunk) { auto migrateStatus = _chunkSelectionPolicy->selectSpecificChunkToMove(opCtx, chunk); if (!migrateStatus.isOK()) { @@ -392,7 +396,7 @@ void Balancer::_mainThread() { continue; } - if (!balancerConfig->shouldBalance()) { + if (!balancerConfig->shouldBalance() || _stopOrPauseRequested()) { LOGV2_DEBUG(21859, 1, "Skipping balancing round because balancing is disabled"); _endRound(opCtx.get(), kBalanceRoundDefaultInterval); continue; @@ -495,11 +499,27 @@ void Balancer::_mainThread() { LOGV2(21867, "CSRS balancer is now stopped"); } +void Balancer::_addPauseRequest() { + stdx::unique_lock<Latch> scopedLock(_mutex); + ++_numPauseRequests; +} + +void Balancer::_removePauseRequest() { + stdx::unique_lock<Latch> scopedLock(_mutex); + invariant(_numPauseRequests > 0); + --_numPauseRequests; +} + bool Balancer::_stopRequested() { stdx::lock_guard<Latch> scopedLock(_mutex); return (_state != kRunning); } +bool Balancer::_stopOrPauseRequested() { + stdx::lock_guard<Latch> scopedLock(_mutex); + return (_state != kRunning || _numPauseRequests > 0); +} + void Balancer::_beginRound(OperationContext* opCtx) { stdx::unique_lock<Latch> lock(_mutex); _inBalancerRound = true; @@ -641,7 +661,7 @@ int Balancer::_moveChunks(OperationContext* opCtx, auto balancerConfig = Grid::get(opCtx)->getBalancerConfiguration(); // If the balancer was disabled since we started this round, don't start new chunk moves - if (_stopRequested() || !balancerConfig->shouldBalance()) { + if (_stopOrPauseRequested() || !balancerConfig->shouldBalance()) { LOGV2_DEBUG(21870, 1, "Skipping balancing round because balancer was stopped"); return 0; } diff --git a/src/mongo/db/s/balancer/balancer.h b/src/mongo/db/s/balancer/balancer.h index ed01a323c43..c62afc0a409 100644 --- a/src/mongo/db/s/balancer/balancer.h +++ b/src/mongo/db/s/balancer/balancer.h @@ -60,6 +60,28 @@ class Balancer : public ReplicaSetAwareServiceConfigSvr<Balancer> { public: /** + * Scoped class to manage the pause/resumeBalancer requests cycle. + * See Balancer::requestPause() for more details. + */ + class ScopedPauseBalancerRequest { + public: + ~ScopedPauseBalancerRequest() { + _balancer->_removePauseRequest(); + } + + private: + Balancer* _balancer; + + ScopedPauseBalancerRequest(Balancer* balancer) : _balancer(balancer) { + _balancer->_addPauseRequest(); + } + + ScopedPauseBalancerRequest(const ScopedPauseBalancerRequest&) = delete; + ScopedPauseBalancerRequest& operator=(const ScopedPauseBalancerRequest&) = delete; + + friend class Balancer; + }; + /** * Provide access to the Balancer decoration on ServiceContext. */ static Balancer* get(ServiceContext* serviceContext); @@ -110,6 +132,14 @@ public: */ void joinCurrentRound(OperationContext* opCtx); + + /** + * Invoked by any client requiring a temporary suspension of the balancer thread + * (I.E. the setFCV process). The request is NOT persisted by the balancer in its config + * document and remains active as long as the returned ScopedPauseRequest doesn't get destroyed. + */ + ScopedPauseBalancerRequest requestPause(); + /** * Blocking call, which requests the balancer to move a single chunk to a more appropriate * shard, in accordance with the active balancer policy. It is not guaranteed that the chunk @@ -185,6 +215,21 @@ private: bool _stopRequested(); /** + * Adds a request to pause the balancer main loop. + */ + void _addPauseRequest(); + + /** + * Removes a previously added request to pause the balancer main loop. + */ + void _removePauseRequest(); + + /** + * Assess whether the balancer has any active pause or stop request. + */ + bool _stopOrPauseRequested(); + + /** * Signals the beginning and end of a balancing round. */ void _beginRound(OperationContext* opCtx); @@ -258,6 +303,9 @@ private: // Number of moved chunks in last round int _balancedLastTime; + // Number of active pause balancer requests + int _numPauseRequests{0}; + // Source of randomness when metadata needs to be randomized. BalancerRandomSource _random; |