summaryrefslogtreecommitdiff
path: root/jstests/sharding
diff options
context:
space:
mode:
authorHaley Connelly <haley.connelly@mongodb.com>2021-06-16 18:24:21 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-07-08 19:10:39 +0000
commit261539ea6562ecd6c9920ae6f5f4449b27353520 (patch)
treef74a4f6be0d6d8a1a212dc715c29c3fdabdf7ed3 /jstests/sharding
parent3364cab736849d175c5d783f34910d3c89d70af0 (diff)
downloadmongo-261539ea6562ecd6c9920ae6f5f4449b27353520.tar.gz
SERVER-53350 Expose methods on ReshardingTest fixture for sending replSetStepUp, SIGTERM, and SIGKILL to shards and config server while resharding is running
Diffstat (limited to 'jstests/sharding')
-rw-r--r--jstests/sharding/libs/resharding_test_fixture.js108
-rw-r--r--jstests/sharding/reshard_collection_failover_shutdown_basic.js56
2 files changed, 162 insertions, 2 deletions
diff --git a/jstests/sharding/libs/resharding_test_fixture.js b/jstests/sharding/libs/resharding_test_fixture.js
index a6b72fb8e31..05c5d4b2add 100644
--- a/jstests/sharding/libs/resharding_test_fixture.js
+++ b/jstests/sharding/libs/resharding_test_fixture.js
@@ -31,6 +31,7 @@ var ReshardingTest = class {
criticalSectionTimeoutMS: criticalSectionTimeoutMS = 24 * 60 * 60 * 1000 /* 1 day */,
periodicNoopIntervalSecs: periodicNoopIntervalSecs = undefined,
writePeriodicNoops: writePeriodicNoops = undefined,
+ enableElections: enableElections = false,
} = {}) {
// The @private JSDoc comments cause VS Code to not display the corresponding properties and
// methods in its autocomplete list. This makes it simpler for test authors to know what the
@@ -53,6 +54,8 @@ var ReshardingTest = class {
this._periodicNoopIntervalSecs = periodicNoopIntervalSecs;
/** @private */
this._writePeriodicNoops = writePeriodicNoops;
+ /** @private */
+ this._enableElections = enableElections;
// Properties set by setup().
/** @private */
@@ -91,6 +94,23 @@ var ReshardingTest = class {
const mongosOptions = {setParameter: {}};
const configOptions = {setParameter: {}};
const rsOptions = {setParameter: {}};
+ const configReplSetTestOptions = {};
+
+ let nodesPerShard = 2;
+ let nodesPerConfigRs = 1;
+
+ if (this._enableElections) {
+ nodesPerShard = 3;
+ nodesPerConfigRs = 3;
+
+ // Increase the likelihood that writes which aren't yet majority-committed end up
+ // getting rolled back.
+ rsOptions.settings = {catchUpTimeoutMillis: 0};
+ configReplSetTestOptions.settings = {catchUpTimeoutMillis: 0};
+
+ rsOptions.setParameter.enableElectionHandoff = 0;
+ configOptions.setParameter.enableElectionHandoff = 0;
+ }
if (this._minimumOperationDurationMS !== undefined) {
configOptions.setParameter.reshardingMinimumOperationDurationMillis =
@@ -113,11 +133,12 @@ var ReshardingTest = class {
this._st = new ShardingTest({
mongos: 1,
mongosOptions,
- config: 1,
+ config: nodesPerConfigRs,
configOptions,
shards: this._numShards,
- rs: {nodes: 2},
+ rs: {nodes: nodesPerShard},
rsOptions,
+ configReplSetTestOptions,
manualAddShard: true,
});
@@ -145,6 +166,12 @@ var ReshardingTest = class {
this._st.s.adminCommand({addShard: shard.host, name: shardName}));
shard.shardName = res.shardAdded;
}
+
+ // In order to enable random failovers, initialize Random's seed if it has not already been
+ // done.
+ if (!Random.isInitialized()) {
+ Random.setRandomSeed();
+ }
}
/** @private */
@@ -168,6 +195,24 @@ var ReshardingTest = class {
return this._recipientShards().map(shard => shard.shardName);
}
+ get configShardName() {
+ return "config";
+ }
+
+ /** @private */
+ _allReplSetTests() {
+ return [
+ {shardName: this.configShardName, rs: this._st.configRS},
+ ...Array.from({length: this._numShards}, (_, i) => this._st[`shard${i}`])
+ ];
+ }
+
+ /** @private */
+ _getReplSetForShard(shardName) {
+ const res = this._allReplSetTests().find(shardInfo => shardInfo.shardName === shardName);
+ return res.rs;
+ }
+
/**
* Shards a non-existing collection using the specified shard key and chunk ranges.
*
@@ -697,6 +742,65 @@ var ReshardingTest = class {
}
/**
+ * Given the shardName, steps up a secondary (chosen at random) to become the new primary of the
+ * shard replica set. To force an election on the configsvr rather than a participant shard, use
+ * shardName = this.configShardName;
+ */
+ stepUpNewPrimaryOnShard(shardName) {
+ jsTestLog(`ReshardingTestFixture stepping up new primary on shard ${shardName}`);
+
+ const replSet = this._getReplSetForShard(shardName);
+ let originalPrimary = replSet.getPrimary();
+ let secondaries = replSet.getSecondaries();
+
+ while (secondaries.length > 0) {
+ // Once the primary is terminated/killed/stepped down, write availability is lost. Avoid
+ // long periods where the replica set doesn't have write availability by trying to step
+ // up secondaries until one succeeds.
+ const newPrimaryIdx = Random.randInt(secondaries.length);
+ const newPrimary = secondaries[newPrimaryIdx];
+
+ const res = newPrimary.adminCommand({replSetStepUp: 1});
+ if (res.ok === 1) {
+ replSet.awaitNodesAgreeOnPrimary();
+ assert.eq(newPrimary, replSet.getPrimary());
+ return;
+ }
+
+ jsTest.log(`ReshardingTestFixture failed to step up secondary ${newPrimary.host} and` +
+ ` got error ${tojson(res)}. Will retry on another secondary until all` +
+ ` secondaries have been exhaused`);
+ secondaries.splice(newPrimaryIdx, 1);
+ }
+
+ jsTest.log(`ReshardingTestFixture failed to step up secondaries, trying to step` +
+ ` original primary back up`);
+ replSet.stepUp(originalPrimary, {awaitReplicationBeforeStepUp: false});
+ }
+
+ killAndRestartPrimaryOnShard(shardName) {
+ jsTestLog(`ReshardingTestFixture killing and restarting primary on shard ${shardName}`);
+
+ const replSet = this._getReplSetForShard(shardName);
+ const originalPrimaryConn = replSet.getPrimary();
+
+ const SIGKILL = 9;
+ const opts = {allowedExitCode: MongoRunner.EXIT_SIGKILL};
+ replSet.restart(originalPrimaryConn, opts, SIGKILL);
+ }
+
+ shutdownAndRestartPrimaryOnShard(shardName) {
+ jsTestLog(
+ `ReshardingTestFixture shutting down and restarting primary on shard ${shardName}`);
+
+ const replSet = this._getReplSetForShard(shardName);
+ const originalPrimaryConn = replSet.getPrimary();
+
+ const SIGTERM = 15;
+ replSet.restart(originalPrimaryConn, {}, SIGTERM);
+ }
+
+ /**
* @returns the timestamp chosen by the resharding operation for cloning.
*
* Should also be used in tandem with retryableWriteManager when calling this method in a
diff --git a/jstests/sharding/reshard_collection_failover_shutdown_basic.js b/jstests/sharding/reshard_collection_failover_shutdown_basic.js
new file mode 100644
index 00000000000..46e0159bf67
--- /dev/null
+++ b/jstests/sharding/reshard_collection_failover_shutdown_basic.js
@@ -0,0 +1,56 @@
+/**
+ * Tests that reshardCollection succeeds when a participant experiences a failover or clean/unclean
+ * restart during the operation.
+ *
+ * TODO SERVER-58343: re-enable this test.
+ * @tags: [
+ * __TEMPORARILY_DISABLED__,
+ * requires_fcv_49,
+ * uses_atclustertime,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/discover_topology.js");
+load("jstests/sharding/libs/resharding_test_fixture.js");
+
+const reshardingTest = new ReshardingTest({numDonors: 2, numRecipients: 2, enableElections: true});
+reshardingTest.setup();
+
+const donorShardNames = reshardingTest.donorShardNames;
+const recipientShardNames = reshardingTest.recipientShardNames;
+const sourceCollection = reshardingTest.createShardedCollection({
+ ns: "reshardingDb.coll",
+ shardKeyPattern: {oldKey: 1},
+ chunks: [
+ {min: {oldKey: MinKey}, max: {oldKey: 0}, shard: donorShardNames[0]},
+ {min: {oldKey: 0}, max: {oldKey: MaxKey}, shard: donorShardNames[1]},
+ ],
+});
+
+assert.commandWorked(sourceCollection.insert([
+ {_id: "stays on shard0", oldKey: -10, newKey: -10},
+ {_id: "moves to shard0", oldKey: 10, newKey: -10},
+ {_id: "moves to shard1", oldKey: -10, newKey: 10},
+ {_id: "stays on shard1", oldKey: 10, newKey: 10},
+]));
+
+reshardingTest.withReshardingInBackground(
+ {
+ newShardKeyPattern: {newKey: 1},
+ newChunks: [
+ {min: {newKey: MinKey}, max: {newKey: 0}, shard: recipientShardNames[0]},
+ {min: {newKey: 0}, max: {newKey: MaxKey}, shard: recipientShardNames[1]},
+ ],
+ },
+ (tempNs) => {
+ reshardingTest.stepUpNewPrimaryOnShard(donorShardNames[0]);
+
+ reshardingTest.killAndRestartPrimaryOnShard(recipientShardNames[0]);
+
+ reshardingTest.shutdownAndRestartPrimaryOnShard(recipientShardNames[1]);
+ });
+
+reshardingTest.teardown();
+})();