SERVER-41758 Verify that at least 1 shard exists after hard-reload in aggregation routing path

(cherry picked from commit 1fa4766c621bd4cfd74319094469eff3a5de3b79)
author: Bernard Gorman <bernard.gorman@mongodb.com> 2019-09-13 10:12:39 +0000
committer: evergreen <evergreen@mongodb.com> 2019-09-13 10:12:39 +0000
commit: 89d5c8b10e40648a403f12a55bcb66f2f5bef384 (patch)
tree: 3674111f238de4c3bbd7b0ab717cac8bccbb864d /jstests
parent: 11eaee8aace14a3b8ba1d7c3ab462e8badc8ffee (diff)
download: mongo-89d5c8b10e40648a403f12a55bcb66f2f5bef384.tar.gz
2 files changed, 138 insertions, 0 deletions
diff --git a/jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_drop_shards.js b/jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_drop_shards.js
new file mode 100644
index 00000000000..54e6ce913bf
--- /dev/null
+++ b/jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_drop_shards.js
@@ -0,0 +1,28 @@
+/**
+ * Verify that operations which must run on all shards, such as $currentOp and $changeStream, do not
+ * crash when shards are added to the cluster mid-operation, or when config.shards is dropped.
+ *
+ * This test inherits from 'sharded_clusterwide_ops_with_add_remove_shards.js' but is kept separate
+ * from it, because (1) we may remove the ability to write to config.shards in the future, at which
+ * point this test can simply be removed; and (2) running a single FSM test with both removeShard
+ * and config.shards.remove({}) can cause the former to hang indefinitely while waiting for the
+ * removed shard to drain.
+ *
+ * @tags: [requires_sharding, requires_non_retryable_writes, catches_command_failures,
+ * uses_change_streams, uses_curop_agg_stage]
+ */
+
+"use strict";
+
+// For base $config setup.
+const baseDir = 'jstests/concurrency/fsm_workloads_add_remove_shards/';
+load(baseDir + 'clusterwide_ops_with_add_remove_shards.js');
+
+// After loading the base file, $config has been populated with states and transitions. We simply
+// overwrite 'states.removeShard' such that it instantly wipes all shards from the cluster rather
+// than removing a single shard via the removeShard command. This is the only way to test that
+// mongoS is resilient to the sudden absence of shards in the middle of an operation, as the
+// removeShard command is not permitted to remove the last existing shard in the cluster.
+$config.states.removeShard = function(db, collName) {
+    assert.commandWorked(db.getSiblingDB("config").shards.remove({}));
+};
diff --git a/jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_remove_shards.js b/jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_remove_shards.js
new file mode 100644
index 00000000000..b16ddcd319b
--- /dev/null
+++ b/jstests/concurrency/fsm_workloads_add_remove_shards/clusterwide_ops_with_add_remove_shards.js
@@ -0,0 +1,110 @@
+/**
+ * Verify that operations which must run on all shards, such as $currentOp and $changeStream, do not
+ * crash when shards are added to or removed from the cluster mid-operation.
+ *
+ * @tags: [requires_sharding, requires_non_retryable_writes, catches_command_failures,
+ * uses_change_streams, uses_curop_agg_stage]
+ */
+
+"use strict";
+
+var $config = (function() {
+    // The 'setup' function is run once by the parent thread after the cluster has been initialized,
+    // before the worker threads have been spawned. The 'this' argument is bound as '$config.data'.
+    function setup(db, collName, cluster) {
+        // Obtain the list of shards present in the cluster. Used to remove and restore shards.
+        this.shardList = db.getSiblingDB("config").shards.find().toArray();
+        // Drop the test database. It's not needed and will complicate re-adding shards.
+        assert.commandWorked(db.dropDatabase());
+    }
+
+    // Returns a random integer in the range [0, max).
+    function randomInt(max) {
+        return Math.floor(Math.random() * Math.floor(max));
+    }
+
+    // Helper to close a clusterwide cursor, given a command result.
+    function closeClusterWideCursor(db, res) {
+        if (res.ok) {
+            db.adminCommand({
+                killCursors: "$cmd.aggregate",
+                cursors: [res.cursor.id],
+            });
+        }
+    }
+
+    var states = {
+        runChangeStream: function(db, collName) {
+            const res = db.adminCommand({
+                aggregate: 1,
+                pipeline: [{$changeStream: {allChangesForCluster: true}}],
+                cursor: {}
+            });
+            closeClusterWideCursor(db, res);
+        },
+
+        runCurrentOp: function(db, collName) {
+            const res = db.adminCommand({aggregate: 1, pipeline: [{$currentOp: {}}], cursor: {}});
+            closeClusterWideCursor(db, res);
+        },
+
+        removeShard: function(db, collName) {
+            // Make sure that only a single removeShard operation is running at any time.
+            const testLocksColl = db.getSiblingDB("config").testLocks;
+            if (!testLocksColl.insert({_id: "removeShard"}).nInserted) {
+                return;
+            }
+            // Iterate until we successfully remove a shard or run out of shards.
+            for (let shardIdx = 0; shardIdx < this.shardList.length; ++shardIdx) {
+                const shardName = this.shardList[shardIdx]._id;
+                if (db.adminCommand({removeShard: shardName}).state === "started") {
+                    break;
+                }
+            }
+            // Remove the lock document so that other threads can call removeShard.
+            assert.commandWorked(testLocksColl.remove({_id: "removeShard"}));
+        },
+
+        addShard: function addShard(db, collName) {
+            const shardIdx = randomInt(this.shardList.length);
+            const shardEntry = this.shardList[shardIdx];
+            db.adminCommand({addShard: shardEntry.host, name: shardEntry._id});
+        },
+
+        init: function(db, collName) {
+            // Do nothing. This is only used to randomize the first action taken by each worker.
+        }
+    };
+
+    const transitionProbabilities =
+        {runChangeStream: 0.25, runCurrentOp: 0.25, removeShard: 0.25, addShard: 0.25};
+    var transitions = {
+        init: transitionProbabilities,
+        runChangeStream: transitionProbabilities,
+        runCurrentOp: transitionProbabilities,
+        removeShard: transitionProbabilities,
+        addShard: transitionProbabilities
+    };
+
+    // The 'teardown' function is run once by the parent thread before the cluster is destroyed, but
+    // after the worker threads have been reaped. The 'this' argument is bound as '$config.data'.
+    function teardown(db, collName, cluster) {
+        // If any shards are draining, unset them so we don't impact subsequent tests.
+        db.getSiblingDB("config").shards.update({}, {$unset: {draining: 1}}, {multi: true});
+        // Ensure that all shards are present in the cluster before shutting down the ShardingTest.
+        for (let shardEntry of this.shardList) {
+            assert.soon(() => db.adminCommand({addShard: shardEntry.host, name: shardEntry._id}).ok,
+                        `failed to add shard ${shardEntry._id} back into cluster at end of test`);
+        }
+    }
+
+    return {
+        threadCount: 100,
+        iterations: 1000,
+        startState: "init",
+        states: states,
+        transitions: transitions,
+        setup: setup,
+        teardown: teardown
+    };
+})();
author	Bernard Gorman <bernard.gorman@mongodb.com>	2019-09-13 10:12:39 +0000
committer	evergreen <evergreen@mongodb.com>	2019-09-13 10:12:39 +0000
commit	89d5c8b10e40648a403f12a55bcb66f2f5bef384 (patch)
tree	3674111f238de4c3bbd7b0ab717cac8bccbb864d /jstests
parent	11eaee8aace14a3b8ba1d7c3ab462e8badc8ffee (diff)
download	mongo-89d5c8b10e40648a403f12a55bcb66f2f5bef384.tar.gz