1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
// Ensures that all pending move chunk operations get interrupted when the primary of the config
// server steps down and then becomes primary again. Then the migration can be rejoined, and a
// success/failure response still returned to the caller.
//
// Also tests the failure of a migration commit command on the source shard of a migration, due to
// the balancer being interrupted, failing to recover the active migrations, and releasing the
// distributed lock.
//
// Remove requires_fcv_44 tag if SERVER-42273 is backported or 4.4 becomes last-stable.
// @tags: [requires_fcv_44]
load('./jstests/libs/chunk_manipulation_util.js');
(function() {
'use strict';
// Intentionally use a config server with 1 node so that the step down and promotion to primary
// are guaranteed to happen on the same host
var st = new ShardingTest({config: 1, shards: 2, other: {chunkSize: 1}});
var mongos = st.s0;
assert.commandWorked(mongos.adminCommand({enableSharding: 'TestDB'}));
st.ensurePrimaryShard('TestDB', st.shard0.shardName);
assert.commandWorked(mongos.adminCommand({shardCollection: 'TestDB.TestColl', key: {Key: 1}}));
var coll = mongos.getDB('TestDB').TestColl;
// For startParallelOps to write its state
var staticMongod = MongoRunner.runMongod({});
function interruptMoveChunkAndRecover(fromShard, toShard, isJumbo) {
pauseMigrateAtStep(toShard, migrateStepNames.rangeDeletionTaskScheduled);
var joinMoveChunk = moveChunkParallel(staticMongod,
mongos.host,
{Key: 0},
null,
'TestDB.TestColl',
toShard.shardName,
true /* expectSuccess */,
isJumbo);
waitForMigrateStep(toShard, migrateStepNames.rangeDeletionTaskScheduled);
// Stepdown the primary in order to force the balancer to stop. Use a timeout of 5 seconds for
// both step down operations, because mongos will retry to find the CSRS primary for up to 20
// seconds and we have two successive ones.
assert.commandWorked(st.configRS.getPrimary().adminCommand({replSetStepDown: 5, force: true}));
// Ensure a new primary is found promptly
st.configRS.getPrimary(30000);
assert.eq(1,
mongos.getDB('config')
.chunks.find({ns: 'TestDB.TestColl', shard: fromShard.shardName})
.itcount());
assert.eq(0,
mongos.getDB('config')
.chunks.find({ns: 'TestDB.TestColl', shard: toShard.shardName})
.itcount());
// At this point, the balancer is in recovery mode. Ensure that stepdown can be done again and
// the recovery mode interrupted.
assert.commandWorked(st.configRS.getPrimary().adminCommand({replSetStepDown: 5, force: true}));
// Ensure a new primary is found promptly
st.configRS.getPrimary(30000);
unpauseMigrateAtStep(toShard, migrateStepNames.rangeDeletionTaskScheduled);
// Ensure that migration succeeded
joinMoveChunk();
assert.eq(0,
mongos.getDB('config')
.chunks.find({ns: 'TestDB.TestColl', shard: fromShard.shardName})
.itcount());
assert.eq(1,
mongos.getDB('config')
.chunks.find({ns: 'TestDB.TestColl', shard: toShard.shardName})
.itcount());
}
// We have one non-jumbo chunk initially
assert.commandWorked(coll.insert({Key: 0, Value: 'Test value'}));
interruptMoveChunkAndRecover(st.shard0, st.shard1, false);
// Add a bunch of docs to this chunks so that it becomes jumbo
const largeString = 'X'.repeat(10000);
let bulk = coll.initializeUnorderedBulkOp();
for (let i = 0; i < 2000; i++) {
bulk.insert({Key: 0, Value: largeString});
}
assert.commandWorked(bulk.execute());
interruptMoveChunkAndRecover(st.shard1, st.shard0, true);
st.stop();
MongoRunner.stopMongod(staticMongod);
})();
|