summaryrefslogtreecommitdiff
path: root/jstests/sharding/refine_collection_shard_key_abort_on_stepup.js
blob: 2059bd227bfce5d95a1d63b90c2dc32b021e8d39 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
//
// Tests that refineCollectionShardKey can be safely aborted on step-up after a failover leads to
// only half of the internal transaction getting replicated. This is a regression test for
// SERVER-48527.
//
// Tag this test as 'requires_find_command' to prevent it from running in the legacy passthrough.
// @tags: [
//   requires_find_command,
//   requires_fcv_44,
//   exclude_from_large_txns
// ]
//

(function() {
'use strict';

load("jstests/libs/fail_point_util.js");
load('jstests/libs/parallel_shell_helpers.js');
load("jstests/replsets/rslib.js");

const st = new ShardingTest({
    shards: 1,
    mongos: 1,
    useBridge: true,
    other: {
        configOptions: {
            setParameter: {
                // Ensure transactions have multiple oplog entries.
                maxNumberOfTransactionOperationsInSingleOplogEntry: 1,
                bgSyncOplogFetcherBatchSize: 1
            }
        }
    }
});
jsTestLog("Reconfig CSRS to have stable primary");
const csrs = st.configRS;
let cfg = csrs.getReplSetConfigFromNode(0);
cfg.settings.electionTimeoutMillis = csrs.kDefaultTimeoutMS;
cfg.settings.catchUpTimeoutMillis = 0;
cfg.settings.chainingAllowed = false;
reconfig(csrs, cfg, true);
waitForConfigReplication(csrs.getPrimary());
csrs.awaitReplication();

const kDbName = jsTestName();
const kCollName = 'foo';
const kNsName = kDbName + '.' + kCollName;

assert.commandWorked(st.s.adminCommand({enableSharding: kDbName}));
assert.commandWorked(st.s.adminCommand({shardCollection: kNsName, key: {_id: 1}}));
assert.commandWorked(st.s.getCollection(kNsName).createIndex({_id: 1, aKey: 1}));

let primary = csrs.getPrimary();
let secondaries = csrs.getSecondaries();
let newPrimary = secondaries[0];
st.s.disconnect(secondaries);

jsTest.log("Stop secondary oplog replication on the extra secondary so it will vote for anyone");
const stopReplProducerFailPoint = configureFailPoint(secondaries[1], 'stopReplProducer');

jsTest.log("Stop secondary oplog replication before the last operation in the transaction");
// The stopReplProducerOnDocument failpoint ensures that secondary stops replicating before
// applying the last operation in the transaction. This depends on the oplog fetcher batch size
// being 1. This also relies on the last operation in the transaction modifying 'config.chunks'.
const stopReplProducerOnDocumentFailPoint = configureFailPoint(
    newPrimary, "stopReplProducerOnDocument", {document: {"applyOps.ns": "config.chunks"}});

jsTestLog("Refining collection shard key in a parallel shell");
let parallelRefineFn = function(ns) {
    assert.commandWorked(db.adminCommand({refineCollectionShardKey: ns, key: {_id: 1, aKey: 1}}));
};
const awaitShell = startParallelShell(funWithArgs(parallelRefineFn, kNsName), st.s.port);

jsTestLog("Wait for the new primary to block on fail point");
stopReplProducerOnDocumentFailPoint.wait();

jsTestLog(`Triggering CSRS failover from ${primary.host} to ${newPrimary.host}`);
assert.commandWorked(newPrimary.adminCommand({replSetStepUp: 1}));
st.s.reconnect(newPrimary);

jsTestLog("Waiting for set to agree on the new primary, " + newPrimary.host);
csrs.awaitNodesAgreeOnPrimary();

jsTestLog("Wait for parallel shell to complete");
awaitShell();

// Make sure we won't apply the whole transaction by any chance.
jsTestLog("Wait for the new primary to stop replication after primary catch-up");
checkLog.contains(newPrimary, "Stopping replication producer");

jsTestLog("Enable replication on the new primary so that it can finish state transition");
stopReplProducerOnDocumentFailPoint.off();
assert.eq(csrs.getPrimary(), newPrimary);

jsTestLog("Re-enable replication on the extra secondary so it can catch up");
stopReplProducerFailPoint.off();
csrs.awaitReplication();

st.stop();
})();