diff options
author | Tommaso Tocci <tommaso.tocci@mongodb.com> | 2022-02-08 18:00:34 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-08 19:13:48 +0000 |
commit | 0bc10b4008eff18c1df261d48954386d49414365 (patch) | |
tree | 5d238328332260431e1196b116fe16a1f61dc157 | |
parent | 048b9683575473b335ffce45093cc4ab7f03616b (diff) | |
download | mongo-0bc10b4008eff18c1df261d48954386d49414365.tar.gz |
SERVER-61635 Validate that the defragmentation algorithm is fairly executed across collections
7 files changed, 152 insertions, 89 deletions
diff --git a/jstests/sharding/balancer_defragmentation_merge_chunks.js b/jstests/sharding/balancer_defragmentation_merge_chunks.js index 40ed6e3b9db..a3a3131329c 100644 --- a/jstests/sharding/balancer_defragmentation_merge_chunks.js +++ b/jstests/sharding/balancer_defragmentation_merge_chunks.js @@ -56,7 +56,7 @@ function setupCollection() { const coll = getNewColl(); assert.commandWorked(st.s.adminCommand({shardCollection: coll.getFullName(), key: {key: 1}})); defragmentationUtil.createFragmentedCollection(st.s, - coll, + coll.getFullName(), 10 /* numChunks */, targetChunkSizeMB / 2 /* maxChunkFillMB */, 0 /* numZones */, diff --git a/jstests/sharding/defragment_large_collection.js b/jstests/sharding/defragment_large_collection.js index f86019e4997..e5d6ecb72b9 100644 --- a/jstests/sharding/defragment_large_collection.js +++ b/jstests/sharding/defragment_large_collection.js @@ -17,11 +17,9 @@ Random.setRandomSeed(); // Test parameters const numShards = Random.randInt(7) + 1; -const numChunks = Random.randInt(28) + 2; -const numZones = Random.randInt(numChunks / 2); +const numCollections = 3; const maxChunkFillMB = 20; const maxChunkSizeMB = 30; -const docSizeBytes = Random.randInt(1024 * 1024) + 50; const chunkSpacing = 1000; const st = new ShardingTest({ @@ -36,28 +34,50 @@ const st = new ShardingTest({ // setup the database for the test assert.commandWorked(st.s.adminCommand({enableSharding: 'db'})); const db = st.getDB('db'); -const coll = db["testColl"]; -const ns = coll.getFullName(); -assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {key: 1}})); +const coll_prefix = "testColl_"; -defragmentationUtil.createFragmentedCollection( - st.s, coll, numChunks, maxChunkFillMB, numZones, docSizeBytes, chunkSpacing); +let collections = []; + +for (let i = 0; i < numCollections; ++i) { + const numChunks = Random.randInt(28) + 2; + const numZones = Random.randInt(numChunks / 2); + const docSizeBytes = Random.randInt(1024 * 1024) + 50; + + const coll = db[coll_prefix + i]; + + defragmentationUtil.createFragmentedCollection( + st.s, coll.getFullName(), numChunks, maxChunkFillMB, numZones, docSizeBytes, chunkSpacing); + + const beginningNumberChunks = + findChunksUtil.countChunksForNs(st.s.getDB('config'), coll.getFullName()); + jsTest.log("Create collection " + coll + " with " + beginningNumberChunks + " chunks."); + + collections.push(coll); +} -let beginningNumberChunks = findChunksUtil.countChunksForNs(st.s.getDB('config'), ns); -jsTest.log("Beginning defragmentation of collection with " + beginningNumberChunks + " chunks."); st.printShardingStatus(); -assert.commandWorked(st.s.adminCommand({ - configureCollectionBalancing: ns, - defragmentCollection: true, - chunkSize: maxChunkSizeMB, -})); + +collections.forEach((coll) => { + assert.commandWorked(st.s.adminCommand({ + configureCollectionBalancing: coll.getFullName(), + defragmentCollection: true, + chunkSize: maxChunkSizeMB, + })); +}); + st.startBalancer(); -// Wait for defragmentation to end and check collection final state -defragmentationUtil.waitForEndOfDefragmentation(st.s, ns); -let finalNumberChunks = findChunksUtil.countChunksForNs(st.s.getDB('config'), ns); -jsTest.log("Finished defragmentation of collection with " + finalNumberChunks + " chunks."); -defragmentationUtil.checkPostDefragmentationState(st.s, coll, maxChunkSizeMB, "key"); +collections.forEach((coll) => { + const ns = coll.getFullName(); + + // Wait for defragmentation to end and check collection final state + defragmentationUtil.waitForEndOfDefragmentation(st.s, ns); + const finalNumberChunks = findChunksUtil.countChunksForNs(st.s.getDB('config'), ns); + jsTest.log("Finished defragmentation of collection " + coll + " with " + finalNumberChunks + + " chunks."); + defragmentationUtil.checkPostDefragmentationState(st.s, ns, maxChunkSizeMB, "key"); +}); + st.printShardingStatus(); st.stop(); diff --git a/jstests/sharding/libs/defragmentation_util.js b/jstests/sharding/libs/defragmentation_util.js index a6ddb7c7fdf..37b6a2c786d 100644 --- a/jstests/sharding/libs/defragmentation_util.js +++ b/jstests/sharding/libs/defragmentation_util.js @@ -4,41 +4,38 @@ var defragmentationUtil = (function() { // This function creates a randomized, fragmented collection. It does not necessarily make a // collection with exactly numChunks chunks nor exactly numZones zones. let createFragmentedCollection = function( - mongos, testColl, numChunks, maxChunkFillMB, numZones, docSizeBytes, chunkSpacing) { - createAndDistributeChunks(mongos, testColl, numChunks, chunkSpacing); - createRandomZones(mongos, testColl, numZones, chunkSpacing); - fillChunksToRandomSize(testColl, docSizeBytes, maxChunkFillMB); + mongos, ns, numChunks, maxChunkFillMB, numZones, docSizeBytes, chunkSpacing) { + assert.commandWorked(mongos.adminCommand({shardCollection: ns, key: {key: 1}})); + + createAndDistributeChunks(mongos, ns, numChunks, chunkSpacing); + createRandomZones(mongos, ns, numZones, chunkSpacing); + fillChunksToRandomSize(mongos, ns, docSizeBytes, maxChunkFillMB); }; - let createAndDistributeChunks = function(mongos, testColl, numChunks, chunkSpacing) { + let createAndDistributeChunks = function(mongos, ns, numChunks, chunkSpacing) { const shards = mongos.getCollection('config.shards').find().toArray(); for (let i = -Math.floor(numChunks / 2); i <= Math.floor(numChunks / 2); i++) { - assert.commandWorked(testColl.getDB().adminCommand( - {split: testColl.getFullName(), middle: {key: i * chunkSpacing}})); + assert.commandWorked(mongos.adminCommand({split: ns, middle: {key: i * chunkSpacing}})); assert.soon(() => { let toShard = Random.randInt(shards.length); - let res = testColl.getDB().adminCommand({ - moveChunk: testColl.getFullName(), - find: {key: i * chunkSpacing}, - to: shards[toShard]._id - }); + let res = mongos.adminCommand( + {moveChunk: ns, find: {key: i * chunkSpacing}, to: shards[toShard]._id}); return res.ok; }); } }; - let createRandomZones = function(mongos, testColl, numZones, chunkSpacing) { + let createRandomZones = function(mongos, ns, numZones, chunkSpacing) { for (let i = -Math.floor(numZones / 2); i < Math.ceil(numZones / 2); i++) { let zoneName = "Zone" + i; - let shardForZone = findChunksUtil - .findOneChunkByNs(mongos.getDB('config'), - testColl.getFullName(), - {min: {key: i * chunkSpacing}}) - .shard; + let shardForZone = + findChunksUtil + .findOneChunkByNs(mongos.getDB('config'), ns, {min: {key: i * chunkSpacing}}) + .shard; assert.commandWorked( mongos.adminCommand({addShardToZone: shardForZone, zone: zoneName})); assert.commandWorked(mongos.adminCommand({ - updateZoneKeyRange: testColl.getFullName(), + updateZoneKeyRange: ns, min: {key: i * chunkSpacing}, max: {key: i * chunkSpacing + chunkSpacing}, zone: zoneName @@ -46,13 +43,11 @@ var defragmentationUtil = (function() { } }; - let fillChunksToRandomSize = function(testColl, docSizeBytes, maxChunkFillMB) { - const chunks = - findChunksUtil - .findChunksByNs(testColl.getDB().getSiblingDB('config'), testColl.getFullName()) - .toArray(); + let fillChunksToRandomSize = function(mongos, ns, docSizeBytes, maxChunkFillMB) { + const chunks = findChunksUtil.findChunksByNs(mongos.getDB('config'), ns).toArray(); const bigString = "X".repeat(docSizeBytes); - let bulk = testColl.initializeUnorderedBulkOp(); + const coll = mongos.getCollection(ns); + let bulk = coll.initializeUnorderedBulkOp(); chunks.forEach((chunk) => { let chunkSize = Random.randInt(maxChunkFillMB); let docsPerChunk = (chunkSize * 1024 * 1024) / docSizeBytes; @@ -75,17 +70,17 @@ var defragmentationUtil = (function() { assert.commandWorked(bulk.execute()); }; - let checkPostDefragmentationState = function(mongos, testColl, maxChunkSizeMB, shardKey) { + let checkPostDefragmentationState = function(mongos, ns, maxChunkSizeMB, shardKey) { const oversizedChunkThreshold = maxChunkSizeMB * 1024 * 1024 * 4 / 3; - const ns = testColl.getFullName(); const chunks = findChunksUtil.findChunksByNs(mongos.getDB('config'), ns).sort({shardKey: 1}).toArray(); - const collStats = assert.commandWorked(testColl.getDB().runCommand({collStats: ns})); + const coll = mongos.getCollection(ns); + const collStats = assert.commandWorked(coll.getDB().runCommand({collStats: ns})); const avgObjSize = collStats.avgObjSize; let checkForOversizedChunk = function( - testColl, chunk, shardKey, avgObjSize, oversizedChunkThreshold) { - let chunkSize = testColl.countDocuments( - {key: {$gte: chunk.min[shardKey], $lt: chunk.max[shardKey]}}) * + coll, chunk, shardKey, avgObjSize, oversizedChunkThreshold) { + let chunkSize = + coll.countDocuments({key: {$gte: chunk.min[shardKey], $lt: chunk.max[shardKey]}}) * avgObjSize; assert.lte( chunkSize, @@ -101,7 +96,7 @@ var defragmentationUtil = (function() { let chunk1Zone = getZoneForRange(mongos, ns, chunk1.min, chunk1.max); let chunk2Zone = getZoneForRange(mongos, ns, chunk2.min, chunk2.max); if (chunk1Zone === chunk2Zone) { - let combinedDataSize = testColl.countDocuments({ + let combinedDataSize = coll.countDocuments({ shardKey: {$gte: chunk1.min[shardKey], $lt: chunk2.max[shardKey]} }) * avgObjSize; assert.lte( @@ -112,10 +107,10 @@ var defragmentationUtil = (function() { } } // Check for oversized chunks - checkForOversizedChunk(testColl, chunk1, shardKey, avgObjSize, oversizedChunkThreshold); + checkForOversizedChunk(coll, chunk1, shardKey, avgObjSize, oversizedChunkThreshold); } const lastChunk = chunks[chunks.length - 1]; - checkForOversizedChunk(testColl, lastChunk, shardKey, avgObjSize, oversizedChunkThreshold); + checkForOversizedChunk(coll, lastChunk, shardKey, avgObjSize, oversizedChunkThreshold); }; let getZoneForRange = function(mongos, ns, minKey, maxKey) { diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp index 59e490fb106..b180654ffb4 100644 --- a/src/mongo/db/s/balancer/balancer.cpp +++ b/src/mongo/db/s/balancer/balancer.cpp @@ -223,7 +223,7 @@ Balancer::Balancer() std::make_unique<BalancerChunkSelectionPolicyImpl>(_clusterStats.get(), _random)), _commandScheduler(std::make_unique<BalancerCommandsSchedulerImpl>()), _defragmentationPolicy( - std::make_unique<BalancerDefragmentationPolicyImpl>(_clusterStats.get())) {} + std::make_unique<BalancerDefragmentationPolicyImpl>(_clusterStats.get(), _random)) {} Balancer::~Balancer() { // Terminate the balancer thread so it doesn't leak memory. diff --git a/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.cpp b/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.cpp index 263aa61fe2f..b64c88acd4c 100644 --- a/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.cpp +++ b/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.cpp @@ -1440,37 +1440,62 @@ MigrateInfoVector BalancerDefragmentationPolicyImpl::selectChunksToMove( OperationContext* opCtx, stdx::unordered_set<ShardId>* usedShards) { MigrateInfoVector chunksToMove; stdx::lock_guard<Latch> lk(_stateMutex); - // TODO (SERVER-61635) evaluate fairness - bool done = false; - while (!done) { - auto selectedChunksFromPreviousRound = chunksToMove.size(); - for (auto it = _defragmentationStates.begin(); it != _defragmentationStates.end();) { + + std::vector<UUID> collectionUUIDs; + collectionUUIDs.reserve(_defragmentationStates.size()); + for (const auto& defragState : _defragmentationStates) { + collectionUUIDs.push_back(defragState.first); + } + std::shuffle(collectionUUIDs.begin(), collectionUUIDs.end(), _random); + + auto popCollectionUUID = [&](std::vector<UUID>::iterator elemIt) { + if (std::next(elemIt) != collectionUUIDs.end()) { + *elemIt = std::move(collectionUUIDs.back()); + } + collectionUUIDs.pop_back(); + }; + + while (!collectionUUIDs.empty()) { + for (auto it = collectionUUIDs.begin(); it != collectionUUIDs.end();) { + + const auto& collUUID = *it; + try { - const auto phaseAdvanced = _refreshDefragmentationPhaseFor(opCtx, it->first); - auto& collDefragmentationPhase = it->second; + auto defragStateIt = _defragmentationStates.find(collUUID); + if (defragStateIt == _defragmentationStates.end()) { + popCollectionUUID(it); + continue; + }; + + const auto phaseAdvanced = _refreshDefragmentationPhaseFor(opCtx, collUUID); + auto& collDefragmentationPhase = defragStateIt->second; if (!collDefragmentationPhase) { - it = _defragmentationStates.erase(it, std::next(it)); + _defragmentationStates.erase(defragStateIt); + popCollectionUUID(it); continue; } auto actionableMigration = collDefragmentationPhase->popNextMigration(opCtx, usedShards); - if (actionableMigration.has_value()) { - chunksToMove.push_back(std::move(*actionableMigration)); - } else if (phaseAdvanced) { - _yieldNextStreamingAction(lk, opCtx); + if (!actionableMigration.has_value()) { + if (phaseAdvanced) { + _yieldNextStreamingAction(lk, opCtx); + } + popCollectionUUID(it); + continue; } + chunksToMove.push_back(std::move(*actionableMigration)); ++it; } catch (DBException& e) { // Catch getCollection and getShardVersion errors. Should only occur if collection // has been removed. LOGV2_ERROR(6172700, "Error while getting next migration", - "uuid"_attr = it->first, + "uuid"_attr = collUUID, "error"_attr = redact(e)); - it = _defragmentationStates.erase(it, std::next(it)); + _defragmentationStates.erase(collUUID); + popCollectionUUID(it); } } - done = (chunksToMove.size() == selectedChunksFromPreviousRound); } return chunksToMove; } @@ -1511,29 +1536,45 @@ bool BalancerDefragmentationPolicyImpl::_refreshDefragmentationPhaseFor(Operatio boost::optional<DefragmentationAction> BalancerDefragmentationPolicyImpl::_nextStreamingAction( OperationContext* opCtx) { - // TODO (SERVER-61635) validate fairness through collections - for (auto it = _defragmentationStates.begin(); it != _defragmentationStates.end();) { + + // Visit the defrag state in round robin fashion starting from a random one + + auto stateIt = [&] { + auto it = _defragmentationStates.begin(); + if (_defragmentationStates.size() > 1) { + std::uniform_int_distribution<size_t> uniDist{0, _defragmentationStates.size() - 1}; + std::advance(it, uniDist(_random)); + } + return it; + }(); + + for (auto stateToVisit = _defragmentationStates.size(); stateToVisit != 0; --stateToVisit) { try { - _refreshDefragmentationPhaseFor(opCtx, it->first); - auto& currentCollectionDefragmentationState = it->second; - if (!currentCollectionDefragmentationState) { - it = _defragmentationStates.erase(it, std::next(it)); - continue; - } - // Get next action - auto nextAction = currentCollectionDefragmentationState->popNextStreamableAction(opCtx); - if (nextAction) { - return nextAction; + _refreshDefragmentationPhaseFor(opCtx, stateIt->first); + auto& currentCollectionDefragmentationState = stateIt->second; + if (currentCollectionDefragmentationState) { + // Get next action + auto nextAction = + currentCollectionDefragmentationState->popNextStreamableAction(opCtx); + if (nextAction) { + return nextAction; + } + ++stateIt; + } else { + stateIt = _defragmentationStates.erase(stateIt, std::next(stateIt)); } - ++it; } catch (DBException& e) { // Catch getCollection and getShardVersion errors. Should only occur if collection has // been removed. LOGV2_ERROR(6153301, "Error while getting next defragmentation action", - "uuid"_attr = it->first, + "uuid"_attr = stateIt->first, "error"_attr = redact(e)); - it = _defragmentationStates.erase(it, std::next(it)); + stateIt = _defragmentationStates.erase(stateIt, std::next(stateIt)); + } + + if (stateIt == _defragmentationStates.end()) { + stateIt = _defragmentationStates.begin(); } } diff --git a/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.h b/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.h index 880bde89573..ab53ab260f3 100644 --- a/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.h +++ b/src/mongo/db/s/balancer/balancer_defragmentation_policy_impl.h @@ -31,6 +31,7 @@ #include "mongo/db/s/balancer/balancer_defragmentation_policy.h" #include "mongo/db/s/balancer/balancer_policy.h" +#include "mongo/db/s/balancer/balancer_random.h" #include "mongo/s/catalog/type_collection.h" namespace mongo { @@ -70,8 +71,8 @@ class BalancerDefragmentationPolicyImpl : public BalancerDefragmentationPolicy { BalancerDefragmentationPolicyImpl& operator=(const BalancerDefragmentationPolicyImpl&) = delete; public: - BalancerDefragmentationPolicyImpl(ClusterStatistics* clusterStats) - : _clusterStats(clusterStats) {} + BalancerDefragmentationPolicyImpl(ClusterStatistics* clusterStats, BalancerRandomSource& random) + : _clusterStats(clusterStats), _random(random) {} ~BalancerDefragmentationPolicyImpl() {} @@ -180,6 +181,8 @@ private: ClusterStatistics* const _clusterStats; + BalancerRandomSource& _random; + stdx::unordered_map<UUID, std::unique_ptr<DefragmentationPhase>, UUID::Hash> _defragmentationStates; }; diff --git a/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp b/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp index 46e48a4aca3..845c9b2893c 100644 --- a/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp +++ b/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp @@ -71,7 +71,10 @@ protected: ShardType(kShardId2.toString(), kShardHost2.toString()), ShardType(kShardId3.toString(), kShardHost3.toString())}; - BalancerDefragmentationPolicyTest() : _clusterStats(), _defragmentationPolicy(&_clusterStats) {} + BalancerDefragmentationPolicyTest() + : _clusterStats(), + _random(std::random_device{}()), + _defragmentationPolicy(&_clusterStats, _random) {} CollectionType setupCollectionWithPhase( const std::vector<ChunkType>& chunkList, @@ -131,6 +134,7 @@ protected: } ClusterStatisticsMock _clusterStats; + BalancerRandomSource _random; BalancerDefragmentationPolicyImpl _defragmentationPolicy; ShardStatistics buildShardStats(ShardId id, |