summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Polato <paolo.polato@mongodb.com>2021-07-16 07:39:39 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-07-26 13:01:34 +0000
commita61de4f56d4f5d83e29117ebd845385f2dc356a1 (patch)
treecf2e26aa248702ad4fea3d50ad37d5a668ccc31e
parent9f1d821bb54f7192fc5192e9cf891fc709c051cb (diff)
downloadmongo-a61de4f56d4f5d83e29117ebd845385f2dc356a1.tar.gz
SERVER-57790 Stop the balancer thread while serving setFCV.
(cherry picked from commit a3e0e9d2998bd9a1ce0f8f975b364ef614e2d57c)
-rw-r--r--src/mongo/db/commands/set_feature_compatibility_version_command.cpp6
-rw-r--r--src/mongo/db/s/balancer/balancer.cpp24
-rw-r--r--src/mongo/db/s/balancer/balancer.h48
3 files changed, 75 insertions, 3 deletions
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
index 2ba40c78d76..6dd6da3579d 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
+++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
@@ -60,6 +60,7 @@
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/tenant_migration_donor_service.h"
#include "mongo/db/repl/tenant_migration_recipient_service.h"
+#include "mongo/db/s/balancer/balancer.h"
#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/db/s/resharding/coordinator_document_gen.h"
#include "mongo/db/s/resharding/resharding_coordinator_service.h"
@@ -399,10 +400,13 @@ public:
boost::optional<Timestamp> changeTimestamp;
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
+ // TODO(SERVER-53283): Remove the call to requestPause()
+ // to allow the execution of balancer rounds during setFCV().
+ auto scopedBalancerPauseRequest = Balancer::get(opCtx)->requestPause();
+
// The Config Server creates a new ID (i.e., timestamp) when it receives an upgrade or
// downgrade request. Alternatively, the request refers to a previously aborted
// operation for which the local FCV document must contain the ID to be reused.
-
if (!serverGlobalParams.featureCompatibility.isUpgradingOrDowngrading()) {
const auto now = VectorClock::get(opCtx)->getTime();
changeTimestamp = now.clusterTime().asTimestamp();
diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp
index 13c676ec04a..c91a4c766d8 100644
--- a/src/mongo/db/s/balancer/balancer.cpp
+++ b/src/mongo/db/s/balancer/balancer.cpp
@@ -256,6 +256,10 @@ void Balancer::joinCurrentRound(OperationContext* opCtx) {
});
}
+Balancer::ScopedPauseBalancerRequest Balancer::requestPause() {
+ return ScopedPauseBalancerRequest(this);
+}
+
Status Balancer::rebalanceSingleChunk(OperationContext* opCtx, const ChunkType& chunk) {
auto migrateStatus = _chunkSelectionPolicy->selectSpecificChunkToMove(opCtx, chunk);
if (!migrateStatus.isOK()) {
@@ -392,7 +396,7 @@ void Balancer::_mainThread() {
continue;
}
- if (!balancerConfig->shouldBalance()) {
+ if (!balancerConfig->shouldBalance() || _stopOrPauseRequested()) {
LOGV2_DEBUG(21859, 1, "Skipping balancing round because balancing is disabled");
_endRound(opCtx.get(), kBalanceRoundDefaultInterval);
continue;
@@ -495,11 +499,27 @@ void Balancer::_mainThread() {
LOGV2(21867, "CSRS balancer is now stopped");
}
+void Balancer::_addPauseRequest() {
+ stdx::unique_lock<Latch> scopedLock(_mutex);
+ ++_numPauseRequests;
+}
+
+void Balancer::_removePauseRequest() {
+ stdx::unique_lock<Latch> scopedLock(_mutex);
+ invariant(_numPauseRequests > 0);
+ --_numPauseRequests;
+}
+
bool Balancer::_stopRequested() {
stdx::lock_guard<Latch> scopedLock(_mutex);
return (_state != kRunning);
}
+bool Balancer::_stopOrPauseRequested() {
+ stdx::lock_guard<Latch> scopedLock(_mutex);
+ return (_state != kRunning || _numPauseRequests > 0);
+}
+
void Balancer::_beginRound(OperationContext* opCtx) {
stdx::unique_lock<Latch> lock(_mutex);
_inBalancerRound = true;
@@ -641,7 +661,7 @@ int Balancer::_moveChunks(OperationContext* opCtx,
auto balancerConfig = Grid::get(opCtx)->getBalancerConfiguration();
// If the balancer was disabled since we started this round, don't start new chunk moves
- if (_stopRequested() || !balancerConfig->shouldBalance()) {
+ if (_stopOrPauseRequested() || !balancerConfig->shouldBalance()) {
LOGV2_DEBUG(21870, 1, "Skipping balancing round because balancer was stopped");
return 0;
}
diff --git a/src/mongo/db/s/balancer/balancer.h b/src/mongo/db/s/balancer/balancer.h
index ed01a323c43..c62afc0a409 100644
--- a/src/mongo/db/s/balancer/balancer.h
+++ b/src/mongo/db/s/balancer/balancer.h
@@ -60,6 +60,28 @@ class Balancer : public ReplicaSetAwareServiceConfigSvr<Balancer> {
public:
/**
+ * Scoped class to manage the pause/resumeBalancer requests cycle.
+ * See Balancer::requestPause() for more details.
+ */
+ class ScopedPauseBalancerRequest {
+ public:
+ ~ScopedPauseBalancerRequest() {
+ _balancer->_removePauseRequest();
+ }
+
+ private:
+ Balancer* _balancer;
+
+ ScopedPauseBalancerRequest(Balancer* balancer) : _balancer(balancer) {
+ _balancer->_addPauseRequest();
+ }
+
+ ScopedPauseBalancerRequest(const ScopedPauseBalancerRequest&) = delete;
+ ScopedPauseBalancerRequest& operator=(const ScopedPauseBalancerRequest&) = delete;
+
+ friend class Balancer;
+ };
+ /**
* Provide access to the Balancer decoration on ServiceContext.
*/
static Balancer* get(ServiceContext* serviceContext);
@@ -110,6 +132,14 @@ public:
*/
void joinCurrentRound(OperationContext* opCtx);
+
+ /**
+ * Invoked by any client requiring a temporary suspension of the balancer thread
+ * (I.E. the setFCV process). The request is NOT persisted by the balancer in its config
+ * document and remains active as long as the returned ScopedPauseRequest doesn't get destroyed.
+ */
+ ScopedPauseBalancerRequest requestPause();
+
/**
* Blocking call, which requests the balancer to move a single chunk to a more appropriate
* shard, in accordance with the active balancer policy. It is not guaranteed that the chunk
@@ -185,6 +215,21 @@ private:
bool _stopRequested();
/**
+ * Adds a request to pause the balancer main loop.
+ */
+ void _addPauseRequest();
+
+ /**
+ * Removes a previously added request to pause the balancer main loop.
+ */
+ void _removePauseRequest();
+
+ /**
+ * Assess whether the balancer has any active pause or stop request.
+ */
+ bool _stopOrPauseRequested();
+
+ /**
* Signals the beginning and end of a balancing round.
*/
void _beginRound(OperationContext* opCtx);
@@ -258,6 +303,9 @@ private:
// Number of moved chunks in last round
int _balancedLastTime;
+ // Number of active pause balancer requests
+ int _numPauseRequests{0};
+
// Source of randomness when metadata needs to be randomized.
BalancerRandomSource _random;