1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
/**
* Shuts down the donor primary at two points in the critical section: while the node is executing
* _configsvrEnsureChunkVersionIsGreaterThan and while the node is forcing a filtering metadata
* refresh.
*
* Shuts down a donor shard which leads mongos to retry if the donor is also the config server, and
* this can fail waiting for read preference if the shard is slow to recover.
* @tags: [
* does_not_support_stepdowns,
* # Require persistence to restart nodes
* requires_persistence,
* config_shard_incompatible,
* ]
*/
(function() {
'use strict';
// This test shuts down a shard primary.
TestData.skipCheckingUUIDsConsistentAcrossCluster = true;
TestData.skipCheckingIndexesConsistentAcrossCluster = true;
TestData.skipCheckOrphans = true;
TestData.skipCheckShardFilteringMetadata = true;
load('jstests/libs/parallel_shell_helpers.js');
load('jstests/libs/fail_point_util.js');
function getNewNs(dbName) {
if (typeof getNewNs.counter == 'undefined') {
getNewNs.counter = 0;
}
getNewNs.counter++;
const collName = "ns" + getNewNs.counter;
return [collName, dbName + "." + collName];
}
const dbName = "test";
const st = new ShardingTest({shards: 2});
const donorShard = st.shard0;
const recipientShard = st.shard1;
assert.commandWorked(
st.s.adminCommand({enableSharding: dbName, primaryShard: donorShard.shardName}));
function testShutDownAfterFailPoint(failPointName) {
const [collName, ns] = getNewNs(dbName);
jsTest.log("Testing with " + tojson(arguments) + " using ns " + ns);
assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {_id: 1}}));
// Insert some docs into the collection.
const numDocs = 1000;
var bulk = st.s.getDB(dbName).getCollection(collName).initializeUnorderedBulkOp();
for (var i = 0; i < numDocs; i++) {
bulk.insert({_id: i});
}
assert.commandWorked(bulk.execute());
// Simulate a network error on sending commit to the config server, so that the donor tries to
// recover the commit decision.
configureFailPoint(donorShard.rs.getPrimary(), "migrationCommitNetworkError");
// Set the requested failpoint and launch the moveChunk asynchronously.
let failPoint = configureFailPoint(donorShard.rs.getPrimary(), failPointName);
const awaitResult = startParallelShell(
funWithArgs(function(ns, toShardName) {
assert.commandWorked(db.adminCommand({moveChunk: ns, find: {_id: 0}, to: toShardName}));
}, ns, recipientShard.shardName), st.s.port);
jsTest.log("Waiting for moveChunk to reach " + failPointName + " failpoint");
failPoint.wait();
let primary = donorShard.rs.getPrimary();
let primary_id = donorShard.rs.getNodeId(primary);
// Ensure we are able to shut down the donor primary by asserting that its exit code is 0.
assert.eq(0, donorShard.rs.stop(primary_id, null, {}, {forRestart: true, waitPid: true}));
awaitResult();
donorShard.rs.start(primary_id, {}, true /* restart */, true /* waitForHealth */);
}
testShutDownAfterFailPoint("hangInEnsureChunkVersionIsGreaterThanInterruptible");
testShutDownAfterFailPoint("hangInRefreshFilteringMetadataUntilSuccessInterruptible");
testShutDownAfterFailPoint("hangInPersistMigrateCommitDecisionInterruptible");
testShutDownAfterFailPoint("hangInDeleteRangeDeletionOnRecipientInterruptible");
testShutDownAfterFailPoint("hangInReadyRangeDeletionLocallyInterruptible");
testShutDownAfterFailPoint("hangInAdvanceTxnNumInterruptible");
st.stop();
})();
|