summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCheahuychou Mao <mao.cheahuychou@gmail.com>2020-12-18 20:19:13 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-12-21 17:18:20 +0000
commitfc8aa79ddb1e4910ad227dddbe0ab3603e663fa8 (patch)
tree58ab6c0c891d08ff074e05dc29494419e9cbf89e
parent3417215850d7a577452552499ea55d1872199d74 (diff)
downloadmongo-fc8aa79ddb1e4910ad227dddbe0ab3603e663fa8.tar.gz
SERVER-53444 Make tests that run removeShard in assert.soon to wait for the state to become completed not error on ShardNotFound
(cherry picked from commit 03637b5614c1a29983cdac9a1f9ab2d3f7060f15)
-rw-r--r--jstests/sharding/balancing_sessions_collection.js9
-rw-r--r--jstests/sharding/listshards.js9
-rw-r--r--jstests/sharding/shard_removal_triggers_catalog_cache_invalidation.js18
3 files changed, 36 insertions, 0 deletions
diff --git a/jstests/sharding/balancing_sessions_collection.js b/jstests/sharding/balancing_sessions_collection.js
index 416eaa6186d..8313b1ca80d 100644
--- a/jstests/sharding/balancing_sessions_collection.js
+++ b/jstests/sharding/balancing_sessions_collection.js
@@ -51,6 +51,15 @@ function removeShardFromCluster(shardName) {
assert.commandWorked(st.s.adminCommand({removeShard: shardName}));
assert.soon(function() {
const res = st.s.adminCommand({removeShard: shardName});
+ if (!res.ok && res.code === ErrorCodes.ShardNotFound) {
+ // If the config server primary steps down right after removing the config.shards doc
+ // for the shard but before responding with "state": "completed", the mongos would retry
+ // the _configsvrRemoveShard command against the new config server primary, which would
+ // not find the removed shard in its ShardRegistry if it has done a ShardRegistry reload
+ // after the config.shards doc for the shard was removed. This would cause the command
+ // to fail with ShardNotFound.
+ return true;
+ }
assert.commandWorked(res);
return ("completed" == res.state);
}, "failed to remove shard " + shardName, kBalancerTimeoutMS);
diff --git a/jstests/sharding/listshards.js b/jstests/sharding/listshards.js
index e008ffb6689..0960746fe7c 100644
--- a/jstests/sharding/listshards.js
+++ b/jstests/sharding/listshards.js
@@ -51,6 +51,15 @@ assert(checkShardName('repl', shardsArray),
// remove 'repl' shard
assert.soon(function() {
var res = shardTest.admin.runCommand({removeShard: 'repl'});
+ if (!res.ok && res.code === ErrorCodes.ShardNotFound) {
+ // If the config server primary steps down right after removing the config.shards doc
+ // for the shard but before responding with "state": "completed", the mongos would retry
+ // the _configsvrRemoveShard command against the new config server primary, which would
+ // not find the removed shard in its ShardRegistry if it has done a ShardRegistry reload
+ // after the config.shards doc for the shard was removed. This would cause the command
+ // to fail with ShardNotFound.
+ return true;
+ }
assert.commandWorked(res, 'removeShard command failed');
return res.state === 'completed';
}, 'failed to remove the replica set shard');
diff --git a/jstests/sharding/shard_removal_triggers_catalog_cache_invalidation.js b/jstests/sharding/shard_removal_triggers_catalog_cache_invalidation.js
index 5eaeba8cf0a..aa51130c007 100644
--- a/jstests/sharding/shard_removal_triggers_catalog_cache_invalidation.js
+++ b/jstests/sharding/shard_removal_triggers_catalog_cache_invalidation.js
@@ -57,12 +57,30 @@ st.startBalancer();
// Remove shard1.
assert.soon(() => {
const removeRes = assert.commandWorked(st.s0.adminCommand({removeShard: st.shard1.shardName}));
+ if (!removeRes.ok && removeRes.code === ErrorCodes.ShardNotFound) {
+ // If the config server primary steps down right after removing the config.shards doc
+ // for the shard but before responding with "state": "completed", the mongos would retry
+ // the _configsvrRemoveShard command against the new config server primary, which would
+ // not find the removed shard in its ShardRegistry if it has done a ShardRegistry reload
+ // after the config.shards doc for the shard was removed. This would cause the command
+ // to fail with ShardNotFound.
+ return true;
+ }
return 'completed' === removeRes.state;
});
// Remove shard0.
assert.soon(() => {
const removeRes = assert.commandWorked(st.s0.adminCommand({removeShard: st.shard0.shardName}));
+ if (!removeRes.ok && removeRes.code === ErrorCodes.ShardNotFound) {
+ // If the config server primary steps down right after removing the config.shards doc
+ // for the shard but before responding with "state": "completed", the mongos would retry
+ // the _configsvrRemoveShard command against the new config server primary, which would
+ // not find the removed shard in its ShardRegistry if it has done a ShardRegistry reload
+ // after the config.shards doc for the shard was removed. This would cause the command
+ // to fail with ShardNotFound.
+ return true;
+ }
return 'completed' === removeRes.state;
});