diff options
author | Haley Connelly <haley.connelly@mongodb.com> | 2021-06-16 18:24:21 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-07-08 19:10:39 +0000 |
commit | 261539ea6562ecd6c9920ae6f5f4449b27353520 (patch) | |
tree | f74a4f6be0d6d8a1a212dc715c29c3fdabdf7ed3 /jstests/sharding | |
parent | 3364cab736849d175c5d783f34910d3c89d70af0 (diff) | |
download | mongo-261539ea6562ecd6c9920ae6f5f4449b27353520.tar.gz |
SERVER-53350 Expose methods on ReshardingTest fixture for sending replSetStepUp, SIGTERM, and SIGKILL to shards and config server while resharding is running
Diffstat (limited to 'jstests/sharding')
-rw-r--r-- | jstests/sharding/libs/resharding_test_fixture.js | 108 | ||||
-rw-r--r-- | jstests/sharding/reshard_collection_failover_shutdown_basic.js | 56 |
2 files changed, 162 insertions, 2 deletions
diff --git a/jstests/sharding/libs/resharding_test_fixture.js b/jstests/sharding/libs/resharding_test_fixture.js index a6b72fb8e31..05c5d4b2add 100644 --- a/jstests/sharding/libs/resharding_test_fixture.js +++ b/jstests/sharding/libs/resharding_test_fixture.js @@ -31,6 +31,7 @@ var ReshardingTest = class { criticalSectionTimeoutMS: criticalSectionTimeoutMS = 24 * 60 * 60 * 1000 /* 1 day */, periodicNoopIntervalSecs: periodicNoopIntervalSecs = undefined, writePeriodicNoops: writePeriodicNoops = undefined, + enableElections: enableElections = false, } = {}) { // The @private JSDoc comments cause VS Code to not display the corresponding properties and // methods in its autocomplete list. This makes it simpler for test authors to know what the @@ -53,6 +54,8 @@ var ReshardingTest = class { this._periodicNoopIntervalSecs = periodicNoopIntervalSecs; /** @private */ this._writePeriodicNoops = writePeriodicNoops; + /** @private */ + this._enableElections = enableElections; // Properties set by setup(). /** @private */ @@ -91,6 +94,23 @@ var ReshardingTest = class { const mongosOptions = {setParameter: {}}; const configOptions = {setParameter: {}}; const rsOptions = {setParameter: {}}; + const configReplSetTestOptions = {}; + + let nodesPerShard = 2; + let nodesPerConfigRs = 1; + + if (this._enableElections) { + nodesPerShard = 3; + nodesPerConfigRs = 3; + + // Increase the likelihood that writes which aren't yet majority-committed end up + // getting rolled back. + rsOptions.settings = {catchUpTimeoutMillis: 0}; + configReplSetTestOptions.settings = {catchUpTimeoutMillis: 0}; + + rsOptions.setParameter.enableElectionHandoff = 0; + configOptions.setParameter.enableElectionHandoff = 0; + } if (this._minimumOperationDurationMS !== undefined) { configOptions.setParameter.reshardingMinimumOperationDurationMillis = @@ -113,11 +133,12 @@ var ReshardingTest = class { this._st = new ShardingTest({ mongos: 1, mongosOptions, - config: 1, + config: nodesPerConfigRs, configOptions, shards: this._numShards, - rs: {nodes: 2}, + rs: {nodes: nodesPerShard}, rsOptions, + configReplSetTestOptions, manualAddShard: true, }); @@ -145,6 +166,12 @@ var ReshardingTest = class { this._st.s.adminCommand({addShard: shard.host, name: shardName})); shard.shardName = res.shardAdded; } + + // In order to enable random failovers, initialize Random's seed if it has not already been + // done. + if (!Random.isInitialized()) { + Random.setRandomSeed(); + } } /** @private */ @@ -168,6 +195,24 @@ var ReshardingTest = class { return this._recipientShards().map(shard => shard.shardName); } + get configShardName() { + return "config"; + } + + /** @private */ + _allReplSetTests() { + return [ + {shardName: this.configShardName, rs: this._st.configRS}, + ...Array.from({length: this._numShards}, (_, i) => this._st[`shard${i}`]) + ]; + } + + /** @private */ + _getReplSetForShard(shardName) { + const res = this._allReplSetTests().find(shardInfo => shardInfo.shardName === shardName); + return res.rs; + } + /** * Shards a non-existing collection using the specified shard key and chunk ranges. * @@ -697,6 +742,65 @@ var ReshardingTest = class { } /** + * Given the shardName, steps up a secondary (chosen at random) to become the new primary of the + * shard replica set. To force an election on the configsvr rather than a participant shard, use + * shardName = this.configShardName; + */ + stepUpNewPrimaryOnShard(shardName) { + jsTestLog(`ReshardingTestFixture stepping up new primary on shard ${shardName}`); + + const replSet = this._getReplSetForShard(shardName); + let originalPrimary = replSet.getPrimary(); + let secondaries = replSet.getSecondaries(); + + while (secondaries.length > 0) { + // Once the primary is terminated/killed/stepped down, write availability is lost. Avoid + // long periods where the replica set doesn't have write availability by trying to step + // up secondaries until one succeeds. + const newPrimaryIdx = Random.randInt(secondaries.length); + const newPrimary = secondaries[newPrimaryIdx]; + + const res = newPrimary.adminCommand({replSetStepUp: 1}); + if (res.ok === 1) { + replSet.awaitNodesAgreeOnPrimary(); + assert.eq(newPrimary, replSet.getPrimary()); + return; + } + + jsTest.log(`ReshardingTestFixture failed to step up secondary ${newPrimary.host} and` + + ` got error ${tojson(res)}. Will retry on another secondary until all` + + ` secondaries have been exhaused`); + secondaries.splice(newPrimaryIdx, 1); + } + + jsTest.log(`ReshardingTestFixture failed to step up secondaries, trying to step` + + ` original primary back up`); + replSet.stepUp(originalPrimary, {awaitReplicationBeforeStepUp: false}); + } + + killAndRestartPrimaryOnShard(shardName) { + jsTestLog(`ReshardingTestFixture killing and restarting primary on shard ${shardName}`); + + const replSet = this._getReplSetForShard(shardName); + const originalPrimaryConn = replSet.getPrimary(); + + const SIGKILL = 9; + const opts = {allowedExitCode: MongoRunner.EXIT_SIGKILL}; + replSet.restart(originalPrimaryConn, opts, SIGKILL); + } + + shutdownAndRestartPrimaryOnShard(shardName) { + jsTestLog( + `ReshardingTestFixture shutting down and restarting primary on shard ${shardName}`); + + const replSet = this._getReplSetForShard(shardName); + const originalPrimaryConn = replSet.getPrimary(); + + const SIGTERM = 15; + replSet.restart(originalPrimaryConn, {}, SIGTERM); + } + + /** * @returns the timestamp chosen by the resharding operation for cloning. * * Should also be used in tandem with retryableWriteManager when calling this method in a diff --git a/jstests/sharding/reshard_collection_failover_shutdown_basic.js b/jstests/sharding/reshard_collection_failover_shutdown_basic.js new file mode 100644 index 00000000000..46e0159bf67 --- /dev/null +++ b/jstests/sharding/reshard_collection_failover_shutdown_basic.js @@ -0,0 +1,56 @@ +/** + * Tests that reshardCollection succeeds when a participant experiences a failover or clean/unclean + * restart during the operation. + * + * TODO SERVER-58343: re-enable this test. + * @tags: [ + * __TEMPORARILY_DISABLED__, + * requires_fcv_49, + * uses_atclustertime, + * ] + */ +(function() { +"use strict"; + +load("jstests/libs/discover_topology.js"); +load("jstests/sharding/libs/resharding_test_fixture.js"); + +const reshardingTest = new ReshardingTest({numDonors: 2, numRecipients: 2, enableElections: true}); +reshardingTest.setup(); + +const donorShardNames = reshardingTest.donorShardNames; +const recipientShardNames = reshardingTest.recipientShardNames; +const sourceCollection = reshardingTest.createShardedCollection({ + ns: "reshardingDb.coll", + shardKeyPattern: {oldKey: 1}, + chunks: [ + {min: {oldKey: MinKey}, max: {oldKey: 0}, shard: donorShardNames[0]}, + {min: {oldKey: 0}, max: {oldKey: MaxKey}, shard: donorShardNames[1]}, + ], +}); + +assert.commandWorked(sourceCollection.insert([ + {_id: "stays on shard0", oldKey: -10, newKey: -10}, + {_id: "moves to shard0", oldKey: 10, newKey: -10}, + {_id: "moves to shard1", oldKey: -10, newKey: 10}, + {_id: "stays on shard1", oldKey: 10, newKey: 10}, +])); + +reshardingTest.withReshardingInBackground( + { + newShardKeyPattern: {newKey: 1}, + newChunks: [ + {min: {newKey: MinKey}, max: {newKey: 0}, shard: recipientShardNames[0]}, + {min: {newKey: 0}, max: {newKey: MaxKey}, shard: recipientShardNames[1]}, + ], + }, + (tempNs) => { + reshardingTest.stepUpNewPrimaryOnShard(donorShardNames[0]); + + reshardingTest.killAndRestartPrimaryOnShard(recipientShardNames[0]); + + reshardingTest.shutdownAndRestartPrimaryOnShard(recipientShardNames[1]); + }); + +reshardingTest.teardown(); +})(); |