From 1cc097075cc9531cf24a43ed249012d719b2b4fe Mon Sep 17 00:00:00 2001 From: Paolo Polato Date: Wed, 28 Dec 2022 17:28:30 +0000 Subject: SERVER-71771 Defragmenter should cap chunk size estimation (cherry picked from commit d0d9e95c07df276f4fa3cef07982803d689da5c7) --- src/mongo/db/s/balancer/balancer.cpp | 3 +- .../db/s/balancer/balancer_commands_scheduler.h | 3 +- .../balancer/balancer_commands_scheduler_impl.cpp | 8 +++-- .../s/balancer/balancer_commands_scheduler_impl.h | 10 ++++-- .../balancer/balancer_commands_scheduler_test.cpp | 16 ++++++---- .../balancer_defragmentation_policy_impl.cpp | 37 +++++++++++++++------- .../balancer_defragmentation_policy_impl.h | 3 ++ .../balancer_defragmentation_policy_test.cpp | 31 ++++++++++++++++-- src/mongo/db/s/balancer/balancer_policy.cpp | 6 ++-- src/mongo/db/s/balancer/balancer_policy.h | 9 ++++-- 10 files changed, 95 insertions(+), 31 deletions(-) diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp index a9b4aa0cb6f..4a613199e2e 100644 --- a/src/mongo/db/s/balancer/balancer.cpp +++ b/src/mongo/db/s/balancer/balancer.cpp @@ -584,7 +584,8 @@ void Balancer::_consumeActionStreamLoop() { dataSizeAction.chunkRange, dataSizeAction.version, dataSizeAction.keyPattern, - dataSizeAction.estimatedValue) + dataSizeAction.estimatedValue, + dataSizeAction.maxSize) .thenRunOn(*executor) .onCompletion([this, selectedStream, diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler.h b/src/mongo/db/s/balancer/balancer_commands_scheduler.h index a75e18e944e..2a16209a2c0 100644 --- a/src/mongo/db/s/balancer/balancer_commands_scheduler.h +++ b/src/mongo/db/s/balancer/balancer_commands_scheduler.h @@ -148,7 +148,8 @@ public: const ChunkRange& chunkRange, const ChunkVersion& version, const KeyPattern& keyPattern, - bool estimatedValue) = 0; + bool estimatedValue, + int64_t maxSize) = 0; virtual SemiFuture requestMoveRange(OperationContext* opCtx, const ShardsvrMoveRange& request, diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp index 955ede552ef..ca15b3b420f 100644 --- a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp +++ b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp @@ -184,6 +184,7 @@ const std::string DataSizeCommandInfo::kKeyPattern = "keyPattern"; const std::string DataSizeCommandInfo::kMinValue = "min"; const std::string DataSizeCommandInfo::kMaxValue = "max"; const std::string DataSizeCommandInfo::kEstimatedValue = "estimate"; +const std::string DataSizeCommandInfo::kMaxSizeValue = "maxSize"; const std::string SplitChunkCommandInfo::kCommandName = "splitChunk"; const std::string SplitChunkCommandInfo::kShardName = "from"; @@ -359,13 +360,15 @@ SemiFuture BalancerCommandsSchedulerImpl::requestDataSize( const ChunkRange& chunkRange, const ChunkVersion& version, const KeyPattern& keyPattern, - bool estimatedValue) { + bool estimatedValue, + int64_t maxSize) { auto commandInfo = std::make_shared(nss, shardId, keyPattern.toBSON(), chunkRange.getMin(), chunkRange.getMax(), estimatedValue, + maxSize, version); return _buildAndEnqueueNewRequest(opCtx, std::move(commandInfo)) @@ -377,7 +380,8 @@ SemiFuture BalancerCommandsSchedulerImpl::requestDataSize( } long long sizeBytes = remoteResponse.data["size"].number(); long long numObjects = remoteResponse.data["numObjects"].number(); - return DataSizeResponse(sizeBytes, numObjects); + bool maxSizeReached = remoteResponse.data["maxReached"].trueValue(); + return DataSizeResponse(sizeBytes, numObjects, maxSizeReached); }) .semi(); } diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h index 92f9f074441..5803a880029 100644 --- a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h +++ b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h @@ -336,12 +336,14 @@ public: const BSONObj& lowerBoundKey, const BSONObj& upperBoundKey, bool estimatedValue, + int64_t maxSize, const ChunkVersion& version) : CommandInfo(shardId, nss, boost::none), _shardKeyPattern(shardKeyPattern), _lowerBoundKey(lowerBoundKey), _upperBoundKey(upperBoundKey), _estimatedValue(estimatedValue), + _maxSize(maxSize), _version(version) {} BSONObj serialise() const override { @@ -350,7 +352,8 @@ public: .append(kKeyPattern, _shardKeyPattern) .append(kMinValue, _lowerBoundKey) .append(kMaxValue, _upperBoundKey) - .append(kEstimatedValue, _estimatedValue); + .append(kEstimatedValue, _estimatedValue) + .append(kMaxSizeValue, _maxSize); _version.serializeToBSON(ChunkVersion::kShardVersionField, &commandBuilder); @@ -362,6 +365,7 @@ private: BSONObj _lowerBoundKey; BSONObj _upperBoundKey; bool _estimatedValue; + int64_t _maxSize; ChunkVersion _version; static const std::string kCommandName; @@ -369,6 +373,7 @@ private: static const std::string kMinValue; static const std::string kMaxValue; static const std::string kEstimatedValue; + static const std::string kMaxSizeValue; }; class SplitChunkCommandInfo : public CommandInfo { @@ -589,7 +594,8 @@ public: const ChunkRange& chunkRange, const ChunkVersion& version, const KeyPattern& keyPattern, - bool estimatedValue) override; + bool estimatedValue, + int64_t maxSize) override; private: enum class SchedulerState { Recovering, Running, Stopping, Stopped }; diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp b/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp index 678e5f63f9f..fdf01b9eebd 100644 --- a/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp +++ b/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp @@ -319,13 +319,15 @@ TEST_F(BalancerCommandsSchedulerTest, SuccessfulRequestChunkDataSizeCommand) { _scheduler.start(operationContext(), getMigrationRecoveryDefaultValues()); ChunkType chunk = makeChunk(0, kShardId0); - auto futureResponse = _scheduler.requestDataSize(operationContext(), - kNss, - chunk.getShard(), - chunk.getRange(), - chunk.getVersion(), - KeyPattern(BSON("x" << 1)), - false /* issuedByRemoteUser */); + auto futureResponse = + _scheduler.requestDataSize(operationContext(), + kNss, + chunk.getShard(), + chunk.getRange(), + chunk.getVersion(), + KeyPattern(BSON("x" << 1)), + false /* issuedByRemoteUser */, + (kDefaultMaxChunkSizeBytes / 100) * 25 /* maxSize */); auto swReceivedDataSize = futureResponse.getNoThrow(); ASSERT_OK(swReceivedDataSize.getStatus()); auto receivedDataSize = swReceivedDataSize.getValue(); diff --git a/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.cpp b/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.cpp index d52421edc9e..99cb2498ad6 100644 --- a/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.cpp +++ b/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.cpp @@ -166,6 +166,11 @@ public: auto collectionChunks = getCollectionChunks(opCtx, coll); const auto collectionZones = getCollectionZones(opCtx, coll); + // Calculate small chunk threshold to limit dataSize commands + const auto maxChunkSizeBytes = getCollectionMaxChunkSizeBytes(opCtx, coll); + const int64_t smallChunkSizeThreshold = + (maxChunkSizeBytes / 100) * kSmallChunkSizeThresholdPctg; + stdx::unordered_map pendingActionsByShards; // Find ranges of chunks; for single-chunk ranges, request DataSize; for multi-range, issue // merge @@ -192,6 +197,7 @@ public: new MergeAndMeasureChunksPhase(coll.getNss(), coll.getUuid(), coll.getKeyPattern().toBSON(), + smallChunkSizeThreshold, std::move(pendingActionsByShards))); } @@ -217,14 +223,15 @@ public: if (pendingActions.rangesWithoutDataSize.size() > pendingActions.rangesToMerge.size()) { const auto& rangeToMeasure = pendingActions.rangesWithoutDataSize.back(); - nextAction = - boost::optional(DataSizeInfo(shardId, - _nss, - _uuid, - rangeToMeasure, - shardVersion, - _shardKey, - true /* estimate */)); + nextAction = boost::optional( + DataSizeInfo(shardId, + _nss, + _uuid, + rangeToMeasure, + shardVersion, + _shardKey, + true /* estimate */, + _smallChunkSizeThresholdBytes /* maxSize */)); pendingActions.rangesWithoutDataSize.pop_back(); } else if (!pendingActions.rangesToMerge.empty()) { const auto& rangeToMerge = pendingActions.rangesToMerge.back(); @@ -297,10 +304,17 @@ public: dataSizeAction.version, dataSizeAction.shardId); auto catalogManager = ShardingCatalogManager::get(opCtx); + // Max out the chunk size if it has has been estimated as + // bigger than _smallChunkSizeThresholdBytes; this will exlude + // the chunk from the list of candidates considered by + // MoveAndMergeChunksPhase + auto estimatedSize = dataSizeResponse.getValue().maxSizeReached + ? std::numeric_limits::max() + : dataSizeResponse.getValue().sizeBytes; catalogManager->setChunkEstimatedSize( opCtx, chunk, - dataSizeResponse.getValue().sizeBytes, + estimatedSize, ShardingCatalogClient::kMajorityWriteConcern); }, [&]() { @@ -353,10 +367,12 @@ private: const NamespaceString& nss, const UUID& uuid, const BSONObj& shardKey, + const int64_t smallChunkSizeThresholdBytes, stdx::unordered_map&& pendingActionsByShards) : _nss(nss), _uuid(uuid), _shardKey(shardKey), + _smallChunkSizeThresholdBytes(smallChunkSizeThresholdBytes), _pendingActionsByShards(std::move(pendingActionsByShards)) {} void _abort(const DefragmentationPhaseEnum nextPhase) { @@ -368,6 +384,7 @@ private: const NamespaceString _nss; const UUID _uuid; const BSONObj _shardKey; + const int64_t _smallChunkSizeThresholdBytes; stdx::unordered_map _pendingActionsByShards; boost::optional _shardToProcess; size_t _outstandingActions{0}; @@ -768,8 +785,6 @@ private: bool _isChunkToMergeLeftSibling; }; - static constexpr uint64_t kSmallChunkSizeThresholdPctg = 25; - const NamespaceString _nss; const UUID _uuid; diff --git a/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.h b/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.h index c5174d9944b..12a8b90c7d9 100644 --- a/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.h +++ b/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.h @@ -64,6 +64,9 @@ public: virtual bool isComplete() const = 0; virtual void userAbort() = 0; + +protected: + static constexpr uint64_t kSmallChunkSizeThresholdPctg = 25; }; class BalancerDefragmentationPolicyImpl : public BalancerDefragmentationPolicy { diff --git a/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp b/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp index c5b71710c24..068df07cdc8 100644 --- a/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp +++ b/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp @@ -270,7 +270,7 @@ TEST_F(BalancerDefragmentationPolicyTest, ASSERT_TRUE(nextAction.is_initialized()); DataSizeInfo dataSizeAction = stdx::get(*nextAction); - auto resp = StatusWith(DataSizeResponse(2000, 4)); + auto resp = StatusWith(DataSizeResponse(2000, 4, false)); _defragmentationPolicy.applyActionResult(operationContext(), dataSizeAction, resp); // 1. The outcome of the data size has been stored in the expected document... @@ -287,6 +287,33 @@ TEST_F(BalancerDefragmentationPolicyTest, verifyExpectedDefragmentationPhaseOndisk(DefragmentationPhaseEnum::kMoveAndMergeChunks); } +TEST_F(BalancerDefragmentationPolicyTest, + TestPhaseOneDataSizeResponsesWithMaxSizeReachedCausesChunkToBeSkippedByPhaseTwo) { + auto coll = setupCollectionWithPhase({makeConfigChunkEntry()}); + setDefaultClusterStats(); + _defragmentationPolicy.startCollectionDefragmentation(operationContext(), coll); + auto nextAction = _defragmentationPolicy.getNextStreamingAction(operationContext()); + ASSERT_TRUE(nextAction.has_value()); + DataSizeInfo dataSizeAction = stdx::get(*nextAction); + + auto resp = StatusWith(DataSizeResponse(2000, 4, true)); + _defragmentationPolicy.applyActionResult(operationContext(), dataSizeAction, resp); + + // 1. The outcome of the data size has been stored in the expected document... + auto chunkQuery = BSON(ChunkType::collectionUUID() + << kUuid << ChunkType::min(kKeyAtMin) << ChunkType::max(kKeyAtMax)); + auto configChunkDoc = + findOneOnConfigCollection(operationContext(), ChunkType::ConfigNS, chunkQuery).getValue(); + ASSERT_EQ(configChunkDoc.getField("estimatedDataSizeBytes").safeNumberLong(), + std::numeric_limits::max()); + + // No new action is expected - and the algorithm should converge + nextAction = _defragmentationPolicy.getNextStreamingAction(operationContext()); + ASSERT_TRUE(nextAction == boost::none); + ASSERT_FALSE(_defragmentationPolicy.isDefragmentingCollection(coll.getUuid())); + verifyExpectedDefragmentationPhaseOndisk(boost::none); +} + TEST_F(BalancerDefragmentationPolicyTest, TestRetriableFailedDataSizeActionGetsReissued) { auto coll = setupCollectionWithPhase({makeConfigChunkEntry()}); _defragmentationPolicy.startCollectionDefragmentation(operationContext(), coll); @@ -321,7 +348,7 @@ TEST_F(BalancerDefragmentationPolicyTest, TestRemoveCollectionEndsDefragmentatio auto nextAction = _defragmentationPolicy.getNextStreamingAction(operationContext()); DataSizeInfo dataSizeAction = stdx::get(*nextAction); - auto resp = StatusWith(DataSizeResponse(2000, 4)); + auto resp = StatusWith(DataSizeResponse(2000, 4, false)); _defragmentationPolicy.applyActionResult(operationContext(), dataSizeAction, resp); // Remove collection entry from config.collections diff --git a/src/mongo/db/s/balancer/balancer_policy.cpp b/src/mongo/db/s/balancer/balancer_policy.cpp index 0cf71995d04..ab3fd5660cc 100644 --- a/src/mongo/db/s/balancer/balancer_policy.cpp +++ b/src/mongo/db/s/balancer/balancer_policy.cpp @@ -1031,13 +1031,15 @@ DataSizeInfo::DataSizeInfo(const ShardId& shardId, const ChunkRange& chunkRange, const ChunkVersion& version, const KeyPattern& keyPattern, - bool estimatedValue) + bool estimatedValue, + int64_t maxSize) : shardId(shardId), nss(nss), uuid(uuid), chunkRange(chunkRange), version(version), keyPattern(keyPattern), - estimatedValue(estimatedValue) {} + estimatedValue(estimatedValue), + maxSize(maxSize) {} } // namespace mongo diff --git a/src/mongo/db/s/balancer/balancer_policy.h b/src/mongo/db/s/balancer/balancer_policy.h index 35f0c8ada30..4dbfdb797c3 100644 --- a/src/mongo/db/s/balancer/balancer_policy.h +++ b/src/mongo/db/s/balancer/balancer_policy.h @@ -186,7 +186,8 @@ struct DataSizeInfo { const ChunkRange& chunkRange, const ChunkVersion& version, const KeyPattern& keyPattern, - bool estimatedValue); + bool estimatedValue, + int64_t maxSize); ShardId shardId; NamespaceString nss; @@ -195,14 +196,16 @@ struct DataSizeInfo { ChunkVersion version; KeyPattern keyPattern; bool estimatedValue; + int64_t maxSize; }; struct DataSizeResponse { - DataSizeResponse(long long sizeBytes, long long numObjects) - : sizeBytes(sizeBytes), numObjects(numObjects) {} + DataSizeResponse(long long sizeBytes, long long numObjects, bool maxSizeReached) + : sizeBytes(sizeBytes), numObjects(numObjects), maxSizeReached(maxSizeReached) {} long long sizeBytes; long long numObjects; + bool maxSizeReached; }; typedef stdx:: -- cgit v1.2.1