summaryrefslogtreecommitdiff
path: root/jstests/sharding/migration_failure.js
blob: 6263d3bd9c32208584b8e8d89370a316e1f7c00a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
//
// Tests that migration failures before and after commit correctly recover when possible.
//
// Also checks that the collection version on a source shard updates correctly after a migration.
//

(function() {
'use strict';

function waitAndGetShardVersion(conn, collNs) {
    var shardVersion = undefined;
    assert.soon(() => {
        shardVersion = conn.adminCommand({getShardVersion: collNs}).global;
        return !(typeof shardVersion == 'string' && shardVersion == 'UNKNOWN');
    });

    return shardVersion;
}

var st = new ShardingTest({shards: 2, mongos: 1});

var mongos = st.s0;
var admin = mongos.getDB("admin");
var coll = mongos.getCollection("foo.bar");

assert(admin.runCommand({enableSharding: coll.getDB() + ""}).ok);
printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: st.shard0.shardName}));
assert(admin.runCommand({shardCollection: coll + "", key: {_id: 1}}).ok);
assert(admin.runCommand({split: coll + "", middle: {_id: 0}}).ok);

st.printShardingStatus();

jsTest.log("Testing failed migrations...");

var oldVersion = null;
var newVersion = null;

// failMigrationCommit -- this creates an error that aborts the migration before the commit
// migration command is sent.
assert.commandWorked(st.shard0.getDB("admin").runCommand(
    {configureFailPoint: 'failMigrationCommit', mode: 'alwaysOn'}));

oldVersion = waitAndGetShardVersion(st.shard0, coll.toString());

assert.commandFailed(
    admin.runCommand({moveChunk: coll + "", find: {_id: 0}, to: st.shard1.shardName}));

newVersion = waitAndGetShardVersion(st.shard0, coll.toString());

assert.eq(oldVersion.t,
          newVersion.t,
          "The shard version major value should not change after a failed migration");
// Split does not cause a shard routing table refresh, but the moveChunk attempt will.
assert.eq(2,
          newVersion.i,
          "The shard routing table should refresh on a failed migration and show the split");

assert.commandWorked(
    st.shard0.getDB("admin").runCommand({configureFailPoint: 'failMigrationCommit', mode: 'off'}));

// migrationCommitNetworkError -- mimic migration commit command returning a network error,
// whereupon the config server is queried to determine that this commit was successful.
assert.commandWorked(st.shard0.getDB("admin").runCommand(
    {configureFailPoint: 'migrationCommitNetworkError', mode: 'alwaysOn'}));

// Run a migration where there will still be chunks in the collection remaining on the shard
// afterwards. This will cause the collection's shardVersion to be bumped higher.
oldVersion = waitAndGetShardVersion(st.shard0, coll.toString());

assert.commandWorked(
    admin.runCommand({moveChunk: coll + "", find: {_id: 1}, to: st.shard1.shardName}));

newVersion = waitAndGetShardVersion(st.shard0, coll.toString());

assert.lt(oldVersion.t, newVersion.t, "The major value in the shard version should have increased");
assert.eq(1, newVersion.i, "The minor value in the shard version should be 1");

// Run a migration to move off the shard's last chunk in the collection. The collection's
// shardVersion will be reset.
oldVersion = waitAndGetShardVersion(st.shard0, coll.toString());

assert.commandWorked(
    admin.runCommand({moveChunk: coll + "", find: {_id: -1}, to: st.shard1.shardName}));

newVersion = waitAndGetShardVersion(st.shard0, coll.toString());

assert.gt(oldVersion.t,
          newVersion.t,
          "The version prior to the migration should be greater than the reset value");

assert.eq(0, newVersion.t, "The shard version should have reset, but the major value is not zero");
assert.eq(0, newVersion.i, "The shard version should have reset, but the minor value is not zero");

assert.commandWorked(st.shard0.getDB("admin").runCommand(
    {configureFailPoint: 'migrationCommitNetworkError', mode: 'off'}));

st.stop();
})();