1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
// Regression test to ensure that we don't crash during a getMore if the client's
// lastKnownCommittedOpTime switches from being ahead of the node's lastCommittedOpTime to behind
// while a tailable awaitData query is running. See SERVER-35239. This also tests that when the
// client's lastKnownCommittedOpTime is behind the node's lastCommittedOpTime, getMore returns early
// with an empty batch.
//
// The test runs a secondary read (getMore) that is blocked on a failpoint while waiting for
// replication. If the storage engine supports snapshot reads, secondary reads do not acquire PBWM
// locks. So in order to not block secondary oplog application while the secondary read is blocked
// on a failpoint, we only run this test with storage engine that supports snapshot read.
// @tags: [requires_fcv_44, requires_snapshot_read]
(function() {
'use strict';
load('jstests/replsets/rslib.js');
const name = 'awaitdata_getmore_new_last_committed_optime';
const replSet = new ReplSetTest({name: name, nodes: 5, settings: {chainingAllowed: false}});
replSet.startSet();
replSet.initiate();
const dbName = 'test';
const collName = 'coll';
const primary = replSet.getPrimary();
const secondaries = replSet.getSecondaries();
const secondary = secondaries[0];
const primaryDB = primary.getDB(dbName);
// Create capped collection on primary and allow it to be committed.
assert.commandWorked(primaryDB.createCollection(collName, {capped: true, size: 2048}));
replSet.awaitReplication();
replSet.awaitLastOpCommitted();
// Stop data replication on 3 secondaries to prevent writes being committed.
jsTestLog('Stopping replication');
stopServerReplication(secondaries[1]);
stopServerReplication(secondaries[2]);
stopServerReplication(secondaries[3]);
// Write data to primary.
for (let i = 0; i < 2; i++) {
assert.commandWorked(primaryDB[collName].insert({_id: i}, {writeConcern: {w: 2}}));
}
replSet.awaitReplication(null, null, [secondary]);
jsTestLog('Secondary has replicated data');
jsTestLog('Starting parallel shell');
// Start a parallel shell because we'll be enabling a failpoint that will make the thread hang.
let waitForGetMoreToFinish = startParallelShell(() => {
load('jstests/replsets/rslib.js');
const secondary = db.getMongo();
secondary.setSlaveOk();
const dbName = 'test';
const collName = 'coll';
const awaitDataDB = db.getSiblingDB('test');
// Create awaitData cursor and get all data written so that a following getMore will have to
// wait for more data.
let cmdRes =
awaitDataDB.runCommand({find: collName, batchSize: 2, awaitData: true, tailable: true});
assert.commandWorked(cmdRes);
assert.gt(cmdRes.cursor.id, NumberLong(0));
assert.eq(cmdRes.cursor.ns, dbName + "." + collName);
assert.eq(cmdRes.cursor.firstBatch.length, 2, tojson(cmdRes));
// Enable failpoint.
assert.commandWorked(db.adminCommand({
configureFailPoint: 'planExecutorHangBeforeShouldWaitForInserts',
mode: 'alwaysOn',
data: {namespace: dbName + "." + collName}
}));
// Call getMore on awaitData cursor with lastKnownCommittedOpTime ahead of node. This will
// hang until we've disabled the failpoint. Set awaitData timeout "maxTimeMS" to use a large
// timeout so we can test if the getMore command returns early on a stale
// lastKnownCommittedOpTime.
const lastOpTime = getLastOpTime(secondary);
const cursorId = cmdRes.cursor.id;
cmdRes = awaitDataDB.runCommand({
getMore: cursorId,
collection: collName,
batchSize: NumberInt(2),
maxTimeMS: ReplSetTest.kDefaultTimeoutMS,
lastKnownCommittedOpTime: lastOpTime
});
assert.commandWorked(cmdRes);
assert.eq(cmdRes.cursor.id, cursorId);
assert.eq(cmdRes.cursor.ns, dbName + "." + collName);
// Test that getMore returns early with an empty batch even though there was a new document
// inserted.
assert.eq(cmdRes.cursor.nextBatch.length, 0, tojson(cmdRes));
}, secondary.port);
// Ensure that we've hit the failpoint before moving on.
checkLog.contains(secondary,
'PlanExecutor - planExecutorHangBeforeShouldWaitForInserts fail point enabled');
// Restart replication on the other nodes.
jsTestLog('Restarting replication');
restartServerReplication(secondaries[1]);
restartServerReplication(secondaries[2]);
restartServerReplication(secondaries[3]);
// Do another write to advance the optime so that the test client's lastKnownCommittedOpTime is
// behind the node's lastCommittedOpTime once all nodes catch up.
jsTestLog('Do another write after restarting replication');
assert.commandWorked(primaryDB[collName].insert({_id: 2}));
// Wait until all nodes have committed the last op. At this point in executing the getMore,
// the node's lastCommittedOpTime should now be ahead of the client's lastKnownCommittedOpTime.
replSet.awaitLastOpCommitted();
jsTestLog('All nodes caught up');
// Disable failpoint.
assert.commandWorked(secondary.adminCommand(
{configureFailPoint: 'planExecutorHangBeforeShouldWaitForInserts', mode: 'off'}));
waitForGetMoreToFinish();
jsTestLog('Parallel shell successfully exited');
replSet.stopSet();
})();
|