diff options
author | Cheahuychou Mao <mao.cheahuychou@gmail.com> | 2020-12-18 20:19:13 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-12-21 17:18:20 +0000 |
commit | fc8aa79ddb1e4910ad227dddbe0ab3603e663fa8 (patch) | |
tree | 58ab6c0c891d08ff074e05dc29494419e9cbf89e | |
parent | 3417215850d7a577452552499ea55d1872199d74 (diff) | |
download | mongo-fc8aa79ddb1e4910ad227dddbe0ab3603e663fa8.tar.gz |
SERVER-53444 Make tests that run removeShard in assert.soon to wait for the state to become completed not error on ShardNotFound
(cherry picked from commit 03637b5614c1a29983cdac9a1f9ab2d3f7060f15)
-rw-r--r-- | jstests/sharding/balancing_sessions_collection.js | 9 | ||||
-rw-r--r-- | jstests/sharding/listshards.js | 9 | ||||
-rw-r--r-- | jstests/sharding/shard_removal_triggers_catalog_cache_invalidation.js | 18 |
3 files changed, 36 insertions, 0 deletions
diff --git a/jstests/sharding/balancing_sessions_collection.js b/jstests/sharding/balancing_sessions_collection.js index 416eaa6186d..8313b1ca80d 100644 --- a/jstests/sharding/balancing_sessions_collection.js +++ b/jstests/sharding/balancing_sessions_collection.js @@ -51,6 +51,15 @@ function removeShardFromCluster(shardName) { assert.commandWorked(st.s.adminCommand({removeShard: shardName})); assert.soon(function() { const res = st.s.adminCommand({removeShard: shardName}); + if (!res.ok && res.code === ErrorCodes.ShardNotFound) { + // If the config server primary steps down right after removing the config.shards doc + // for the shard but before responding with "state": "completed", the mongos would retry + // the _configsvrRemoveShard command against the new config server primary, which would + // not find the removed shard in its ShardRegistry if it has done a ShardRegistry reload + // after the config.shards doc for the shard was removed. This would cause the command + // to fail with ShardNotFound. + return true; + } assert.commandWorked(res); return ("completed" == res.state); }, "failed to remove shard " + shardName, kBalancerTimeoutMS); diff --git a/jstests/sharding/listshards.js b/jstests/sharding/listshards.js index e008ffb6689..0960746fe7c 100644 --- a/jstests/sharding/listshards.js +++ b/jstests/sharding/listshards.js @@ -51,6 +51,15 @@ assert(checkShardName('repl', shardsArray), // remove 'repl' shard assert.soon(function() { var res = shardTest.admin.runCommand({removeShard: 'repl'}); + if (!res.ok && res.code === ErrorCodes.ShardNotFound) { + // If the config server primary steps down right after removing the config.shards doc + // for the shard but before responding with "state": "completed", the mongos would retry + // the _configsvrRemoveShard command against the new config server primary, which would + // not find the removed shard in its ShardRegistry if it has done a ShardRegistry reload + // after the config.shards doc for the shard was removed. This would cause the command + // to fail with ShardNotFound. + return true; + } assert.commandWorked(res, 'removeShard command failed'); return res.state === 'completed'; }, 'failed to remove the replica set shard'); diff --git a/jstests/sharding/shard_removal_triggers_catalog_cache_invalidation.js b/jstests/sharding/shard_removal_triggers_catalog_cache_invalidation.js index 5eaeba8cf0a..aa51130c007 100644 --- a/jstests/sharding/shard_removal_triggers_catalog_cache_invalidation.js +++ b/jstests/sharding/shard_removal_triggers_catalog_cache_invalidation.js @@ -57,12 +57,30 @@ st.startBalancer(); // Remove shard1. assert.soon(() => { const removeRes = assert.commandWorked(st.s0.adminCommand({removeShard: st.shard1.shardName})); + if (!removeRes.ok && removeRes.code === ErrorCodes.ShardNotFound) { + // If the config server primary steps down right after removing the config.shards doc + // for the shard but before responding with "state": "completed", the mongos would retry + // the _configsvrRemoveShard command against the new config server primary, which would + // not find the removed shard in its ShardRegistry if it has done a ShardRegistry reload + // after the config.shards doc for the shard was removed. This would cause the command + // to fail with ShardNotFound. + return true; + } return 'completed' === removeRes.state; }); // Remove shard0. assert.soon(() => { const removeRes = assert.commandWorked(st.s0.adminCommand({removeShard: st.shard0.shardName})); + if (!removeRes.ok && removeRes.code === ErrorCodes.ShardNotFound) { + // If the config server primary steps down right after removing the config.shards doc + // for the shard but before responding with "state": "completed", the mongos would retry + // the _configsvrRemoveShard command against the new config server primary, which would + // not find the removed shard in its ShardRegistry if it has done a ShardRegistry reload + // after the config.shards doc for the shard was removed. This would cause the command + // to fail with ShardNotFound. + return true; + } return 'completed' === removeRes.state; }); |