From ed7703c26e871510a488d8e725dc4bf101cde5a2 Mon Sep 17 00:00:00 2001 From: Tommaso Tocci Date: Fri, 15 Jul 2022 18:44:22 +0000 Subject: SERVER-67733 ShardingTest::awaitBalancerRound() doesn't work in case of CSRS stepdowns --- .../no_balance_collection.js | 2 +- jstests/sharding/auto_rebalance_parallel.js | 4 +-- jstests/sharding/balancer_window.js | 4 +-- jstests/sharding/enforce_zone_policy.js | 2 +- jstests/sharding/libs/mongos_api_params_util.js | 2 +- jstests/sharding/merge_with_drop_shard.js | 2 -- src/mongo/db/s/balancer/balancer.cpp | 1 + src/mongo/shell/shardingtest.js | 35 ---------------------- src/mongo/shell/utils_sh.js | 26 +++++++++++----- 9 files changed, 26 insertions(+), 52 deletions(-) diff --git a/jstests/noPassthroughWithMongod/no_balance_collection.js b/jstests/noPassthroughWithMongod/no_balance_collection.js index 8b9d16d942b..ae976406c2a 100644 --- a/jstests/noPassthroughWithMongod/no_balance_collection.js +++ b/jstests/noPassthroughWithMongod/no_balance_collection.js @@ -76,7 +76,7 @@ sh.disableBalancing(collB); // Wait for the balancer to fully finish the last migration and write the changelog // MUST set db var here, ugly but necessary db = st.s0.getDB("config"); -st.waitForBalancer(true, 60000); +st.awaitBalancerRound(); // Make sure auto-migrates on insert don't move data var lastMigration = sh._lastMigration(collB); diff --git a/jstests/sharding/auto_rebalance_parallel.js b/jstests/sharding/auto_rebalance_parallel.js index cb424f0f920..4d63d7c3120 100644 --- a/jstests/sharding/auto_rebalance_parallel.js +++ b/jstests/sharding/auto_rebalance_parallel.js @@ -63,8 +63,8 @@ const testColl1InitialMoves = countMoves('TestDB.TestColl1'); const testColl2InitialMoves = countMoves('TestDB.TestColl2'); st.startBalancer(); -st.waitForBalancer(true, 60000); -st.waitForBalancer(true, 60000); +st.awaitBalancerRound(); +st.awaitBalancerRound(); st.stopBalancer(); checkCollectionBalanced('TestDB.TestColl1'); diff --git a/jstests/sharding/balancer_window.js b/jstests/sharding/balancer_window.js index 333cc30101e..0e93d967363 100644 --- a/jstests/sharding/balancer_window.js +++ b/jstests/sharding/balancer_window.js @@ -78,7 +78,7 @@ assert.commandWorked( true)); st.startBalancer(); -st.waitForBalancer(true, 60000); +st.awaitBalancerRound(); var shard0ChunksAfter = findChunksUtil.findChunksByNs(configDB, ns, {shard: st.shard0.shardName}).count(); @@ -93,7 +93,7 @@ assert.commandWorked(configDB.settings.update( }, true)); -st.waitForBalancer(true, 60000); +st.awaitBalancerRound(); shard0ChunksAfter = findChunksUtil.findChunksByNs(configDB, ns, {shard: st.shard0.shardName}).count(); diff --git a/jstests/sharding/enforce_zone_policy.js b/jstests/sharding/enforce_zone_policy.js index 1a35653fc00..f21c313bbd4 100644 --- a/jstests/sharding/enforce_zone_policy.js +++ b/jstests/sharding/enforce_zone_policy.js @@ -39,7 +39,7 @@ function assertBalanceCompleteAndStable(checkFunc, stepName) { assert.soon(checkFunc, 'Balance at step ' + stepName + ' did not happen', 3 * 60 * 1000, 2000); - st.waitForBalancer(true, 60000); + st.awaitBalancerRound(); st.printShardingStatus(true); assert(checkFunc()); diff --git a/jstests/sharding/libs/mongos_api_params_util.js b/jstests/sharding/libs/mongos_api_params_util.js index 6d0a4b2babc..158fe2e3ac0 100644 --- a/jstests/sharding/libs/mongos_api_params_util.js +++ b/jstests/sharding/libs/mongos_api_params_util.js @@ -75,7 +75,7 @@ let MongosAPIParametersUtil = (function() { function awaitRemoveShard(shardName) { assert.commandWorked(st.startBalancer()); - st.waitForBalancer(true, 60000); + st.awaitBalancerRound(); assert.soon(() => { const res = st.s.adminCommand({removeShard: shardName}); jsTestLog(`removeShard result: ${tojson(res)}`); diff --git a/jstests/sharding/merge_with_drop_shard.js b/jstests/sharding/merge_with_drop_shard.js index bc50b8fc058..bc0b18715ce 100644 --- a/jstests/sharding/merge_with_drop_shard.js +++ b/jstests/sharding/merge_with_drop_shard.js @@ -29,7 +29,6 @@ function setAggHang(mode) { function removeShard(shard) { // We need the balancer to drain all the chunks out of the shard that is being removed. assert.commandWorked(st.startBalancer()); - st.waitForBalancer(true, 60000); var res = st.s.adminCommand({removeShard: shard.shardName}); assert.commandWorked(res); assert.eq('started', res.state); @@ -45,7 +44,6 @@ function removeShard(shard) { st.configRS.awaitLastOpCommitted(); assert.commandWorked(st.s.adminCommand({flushRouterConfig: 1})); assert.commandWorked(st.stopBalancer()); - st.waitForBalancer(false, 60000); } function addShard(shard) { diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp index f10a96cee4d..6822de95f8a 100644 --- a/src/mongo/db/s/balancer/balancer.cpp +++ b/src/mongo/db/s/balancer/balancer.cpp @@ -467,6 +467,7 @@ void Balancer::report(OperationContext* opCtx, BSONObjBuilder* builder) { builder->append("mode", BalancerSettingsType::kBalancerModes[mode]); builder->append("inBalancerRound", _inBalancerRound); builder->append("numBalancerRounds", _numBalancerRounds); + builder->append("term", repl::ReplicationCoordinator::get(opCtx)->getTerm()); } void Balancer::_consumeActionStreamLoop() { diff --git a/src/mongo/shell/shardingtest.js b/src/mongo/shell/shardingtest.js index b9465970890..1b5af0bbc66 100644 --- a/src/mongo/shell/shardingtest.js +++ b/src/mongo/shell/shardingtest.js @@ -739,41 +739,6 @@ var ShardingTest = function(params) { return max - min; }; - /** - * Waits up to the specified timeout (with a default of 60s) for the balancer to execute one - * round. If no round has been executed, throws an error. - * - * The mongosConnection parameter is optional and allows callers to specify a connection - * different than the first mongos instance in the list. - */ - this.awaitBalancerRound = function(timeoutMs, mongosConnection) { - timeoutMs = timeoutMs || 60000; - mongosConnection = mongosConnection || self.s0; - - // Get the balancer section from the server status of the config server primary - function getBalancerStatus() { - var balancerStatus = - assert.commandWorked(mongosConnection.adminCommand({balancerStatus: 1})); - if (balancerStatus.mode !== 'full') { - throw Error('Balancer is not enabled'); - } - - return balancerStatus; - } - - var initialStatus = getBalancerStatus(); - var currentStatus; - assert.soon( - function() { - currentStatus = getBalancerStatus(); - return (currentStatus.numBalancerRounds - initialStatus.numBalancerRounds) != 0; - }, - function() { - return 'Latest balancer status: ' + tojson(currentStatus); - }, - timeoutMs); - }; - /** * Waits up to one minute for the difference in chunks between the most loaded shard and * least loaded shard to be 0 or 1, indicating that the collection is well balanced. This should diff --git a/src/mongo/shell/utils_sh.js b/src/mongo/shell/utils_sh.js index b4bd7175096..3bb6db08d49 100644 --- a/src/mongo/shell/utils_sh.js +++ b/src/mongo/shell/utils_sh.js @@ -255,18 +255,28 @@ sh.waitForPingChange = function(activePings, timeout, interval) { return remainingPings; }; -sh.waitForBalancer = function(wait, timeout, interval) { - if (typeof (wait) === 'undefined') { - wait = false; - } +/** + * Waits up to the specified timeout (with a default of 60s) for the balancer to execute one + * round. If no round has been executed, throws an error. + */ +sh.awaitBalancerRound = function(timeout, interval) { + timeout = timeout || 60000; + var initialStatus = sh._getBalancerStatus(); - if (!initialStatus.inBalancerRound && !wait) { - return; - } var currentStatus; assert.soon(function() { currentStatus = sh._getBalancerStatus(); - return (currentStatus.numBalancerRounds - initialStatus.numBalancerRounds) != 0; + assert.eq(currentStatus.mode, 'full', "Balancer is disabled"); + if (!friendlyEqual(currentStatus.term, initialStatus.term)) { + // A new primary of the csrs has been elected + initialStatus = currentStatus; + return false; + } + assert.gte(currentStatus.numBalancerRounds, + initialStatus.numBalancerRounds, + 'Number of balancer rounds moved back in time unexpectedly. Current status: ' + + tojson(currentStatus) + ', initial status: ' + tojson(initialStatus)); + return currentStatus.numBalancerRounds > initialStatus.numBalancerRounds; }, 'Latest balancer status: ' + tojson(currentStatus), timeout, interval); }; -- cgit v1.2.1