summaryrefslogtreecommitdiff
path: root/jstests
diff options
context:
space:
mode:
Diffstat (limited to 'jstests')
-rw-r--r--jstests/noPassthrough/minvalid.js7
-rw-r--r--jstests/replsets/clean_shutdown_oplog_state.js24
-rw-r--r--jstests/replsets/oplog_replay_on_startup.js419
-rw-r--r--jstests/replsets/oplog_truncated_on_recovery.js45
-rw-r--r--jstests/replsets/slave_delay_clean_shutdown.js61
5 files changed, 525 insertions, 31 deletions
diff --git a/jstests/noPassthrough/minvalid.js b/jstests/noPassthrough/minvalid.js
index 6f22e65e2ca..d31f6d58da7 100644
--- a/jstests/noPassthrough/minvalid.js
+++ b/jstests/noPassthrough/minvalid.js
@@ -21,9 +21,10 @@ var lastOp = local.oplog.rs.find().sort({$natural: -1}).limit(1).next();
printjson(lastOp);
print("3: change minvalid");
-// primaries don't populate minvalid by default
-local.replset.minvalid.insert(
- {ts: new Timestamp(lastOp.ts.t, lastOp.ts.i + 1), h: new NumberLong("1234567890")});
+assert.writeOK(local.replset.minvalid.update(
+ {},
+ {$set: {ts: new Timestamp(lastOp.ts.t, lastOp.ts.i + 1), h: new NumberLong("1234567890")}},
+ {upsert: true}));
printjson(local.replset.minvalid.findOne());
print("4: restart");
diff --git a/jstests/replsets/clean_shutdown_oplog_state.js b/jstests/replsets/clean_shutdown_oplog_state.js
index 51dba43ff98..3a856db1245 100644
--- a/jstests/replsets/clean_shutdown_oplog_state.js
+++ b/jstests/replsets/clean_shutdown_oplog_state.js
@@ -57,9 +57,10 @@
var conn = MongoRunner.runMongod(options);
assert.neq(null, conn, "secondary failed to start");
- // Following a clean shutdown of a 3.2 node, the oplog must exactly match the applied
- // operations. Additionally, the begin field must not be in the minValid document and the ts
- // must match the top of the oplog (SERVER-25353).
+ // Following clean shutdown of a node, the oplog must exactly match the applied operations.
+ // Additionally, the begin field must not be in the minValid document, the ts must match the
+ // top of the oplog (SERVER-25353), and the oplogDeleteFromPoint must be null (SERVER-7200 and
+ // SERVER-25071).
var oplogDoc = conn.getCollection('local.oplog.rs')
.find({ns: 'test.coll'})
.sort({$natural: -1})
@@ -68,9 +69,20 @@
var minValidDoc =
conn.getCollection('local.replset.minvalid').find().sort({$natural: -1}).limit(1)[0];
printjson({oplogDoc: oplogDoc, collDoc: collDoc, minValidDoc: minValidDoc});
- assert.eq(collDoc._id, oplogDoc.o._id);
- assert(!('begin' in minValidDoc), 'begin in minValidDoc');
- assert.eq(minValidDoc.ts, oplogDoc.ts);
+ try {
+ assert.eq(collDoc._id, oplogDoc.o._id);
+ assert(!('begin' in minValidDoc), 'begin in minValidDoc');
+ assert.eq(minValidDoc.ts, oplogDoc.ts);
+ if ('oplogDeleteFromPoint' in minValidDoc) {
+ // If present it must be the null timestamp.
+ assert.eq(minValidDoc.oplogDeleteFromPoint, Timestamp());
+ }
+ } catch (e) {
+ jsTest.log(
+ "Look above and make sure clean shutdown finished without resorting to SIGKILL." +
+ "\nUnfortunately that currently doesn't fail the test.");
+ throw e;
+ }
rst.stopSet();
})();
diff --git a/jstests/replsets/oplog_replay_on_startup.js b/jstests/replsets/oplog_replay_on_startup.js
new file mode 100644
index 00000000000..0864c781d0c
--- /dev/null
+++ b/jstests/replsets/oplog_replay_on_startup.js
@@ -0,0 +1,419 @@
+// SERVER-7200 On startup, replica set nodes delete oplog state past the oplog delete point and
+// apply any remaining unapplied ops before coming up as a secondary.
+//
+// @tags: [requires_persistence]
+(function() {
+ "use strict";
+
+ var ns = "test.coll";
+
+ var rst = new ReplSetTest({
+ nodes: 1,
+ });
+
+ rst.startSet();
+ rst.initiate();
+
+ var conn = rst.getPrimary(); // Waits for PRIMARY state.
+ var term = conn.getCollection('local.oplog.rs').find().sort({$natural: -1}).limit(1).next().t;
+ if (typeof(term) == 'undefined') {
+ term = -1; // Use a dummy term for PV0.
+ }
+
+ function runTest({
+ oplogEntries,
+ collectionContents,
+ deletePoint,
+ begin,
+ minValid,
+ expectedState,
+ expectedApplied,
+ }) {
+ if (term != -1) {
+ term++; // Each test gets a new term on PV1 to ensure OpTimes always move forward.
+ }
+
+ conn = rst.restart(0, {noReplSet: true}); // Restart as a standalone node.
+ assert.neq(null, conn, "failed to restart");
+ var oplog = conn.getCollection('local.oplog.rs');
+ var minValidColl = conn.getCollection('local.replset.minvalid');
+ var coll = conn.getCollection(ns);
+
+ // Reset state to empty.
+ assert.commandWorked(oplog.runCommand('emptycapped'));
+ coll.drop();
+ assert.commandWorked(coll.runCommand('create'));
+
+ var ts = (num) => num === null ? Timestamp() : Timestamp(1000, num);
+
+ oplogEntries.forEach((num) => {
+ assert.writeOK(oplog.insert({
+ ts: ts(num),
+ t: term,
+ h: NumberLong(1),
+ op: 'i',
+ ns: ns,
+ o: {_id: num},
+ }));
+ });
+
+ collectionContents.forEach((num) => {
+ assert.writeOK(coll.insert({_id: num}));
+ });
+
+ var injectedMinValidDoc = {
+ _id: ObjectId(),
+
+ // minvalid:
+ ts: ts(minValid),
+ t: term,
+
+ // appliedThrough
+ begin: {
+ ts: ts(begin),
+ t: term,
+ },
+
+ oplogDeleteFromPoint: ts(deletePoint),
+ };
+
+ // This weird mechanism is the only way to bypass mongod's attempt to fill in null
+ // Timestamps.
+ assert.writeOK(minValidColl.remove({}));
+ assert.writeOK(minValidColl.update({}, {$set: injectedMinValidDoc}, {upsert: true}));
+ assert.eq(minValidColl.findOne(),
+ injectedMinValidDoc,
+ "If the Timestamps differ, the server may be filling in the null timestamps");
+
+ try {
+ conn = rst.restart(0); // Restart in replSet mode again.
+ } catch (e) {
+ assert.eq(expectedState, 'FATAL', 'node failed to restart: ' + e);
+ return;
+ }
+
+ // Wait for the node to go to SECONDARY if it is able.
+ assert.soon(
+ () =>
+ conn.adminCommand('serverStatus').metrics.repl.apply.attemptsToBecomeSecondary > 0,
+ () => conn.adminCommand('serverStatus').metrics.repl.apply.attemptsToBecomeSecondary);
+
+ var isMaster = conn.adminCommand('ismaster');
+ switch (expectedState) {
+ case 'SECONDARY':
+ // Primary is also acceptable since once a node becomes secondary, it will try to
+ // become primary if it is eligible and has enough votes (which this node does).
+ // This is supposed to test that we reach secondary, not that we stay there.
+ assert(isMaster.ismaster || isMaster.secondary,
+ 'not PRIMARY or SECONDARY: ' + tojson(isMaster));
+
+ // Wait for node to become primary. This is nesessary to avoid the find below
+ // failing with "NotMasterOrSecondary" errors if it happens to run while the
+ // node is in drain mode while becoming primary.
+ conn = rst.getPrimary();
+ break;
+
+ case 'RECOVERING':
+ assert(!isMaster.ismaster && !isMaster.secondary,
+ 'not in RECOVERING: ' + tojson(isMaster));
+
+ // Restart as a standalone node again so we can read from the collection.
+ conn = rst.restart(0, {noReplSet: true});
+ break;
+
+ case 'FATAL':
+ doassert("server startup didn't fail when it should have");
+ break;
+
+ default:
+ doassert('expectedState ' + expectedState + ' is not supported');
+ }
+
+ // Ensure the oplog has the entries it should have and none that it shouldn't.
+ assert.eq(conn.getCollection('local.oplog.rs')
+ .find({ns: ns, op: 'i'})
+ .sort({$natural: 1})
+ .map((op) => op.o._id),
+ expectedApplied);
+
+ // Ensure that all ops that should have been applied were.
+ conn.setSlaveOk(true);
+ assert.eq(conn.getCollection(ns).find().sort({_id: 1}).map((obj) => obj._id),
+ expectedApplied);
+ }
+
+ //
+ // Normal 3.4 cases
+ //
+
+ runTest({
+ oplogEntries: [1, 2, 3],
+ collectionContents: [1, 2, 3],
+ deletePoint: null,
+ begin: null,
+ minValid: null,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3],
+ collectionContents: [1, 2, 3],
+ deletePoint: null,
+ begin: null,
+ minValid: 2,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3],
+ collectionContents: [1, 2, 3],
+ deletePoint: null,
+ begin: null,
+ minValid: 3,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3],
+ collectionContents: [1, 2, 3],
+ deletePoint: null,
+ begin: 3,
+ minValid: 3,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3],
+ collectionContents: [1, 2, 3],
+ deletePoint: 4,
+ begin: 3,
+ minValid: 3,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, 4, 5, 6],
+ collectionContents: [1, 2, 3],
+ deletePoint: 4,
+ begin: 3,
+ minValid: 3,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, /*4,*/ 5, 6],
+ collectionContents: [1, 2, 3],
+ deletePoint: 4,
+ begin: 3,
+ minValid: 3,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, 4, 5, 6],
+ collectionContents: [1, 2, 3],
+ deletePoint: null,
+ begin: 3,
+ minValid: 3,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3, 4, 5, 6],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, 4, 5, 6],
+ collectionContents: [1, 2, 3],
+ deletePoint: null,
+ begin: 3,
+ minValid: 6,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3, 4, 5, 6],
+ });
+
+ //
+ // 3.2 -> 3.4 upgrade cases
+ //
+
+ runTest({
+ oplogEntries: [1, 2, 3],
+ collectionContents: [1, 2, 3],
+ deletePoint: null,
+ begin: 3,
+ minValid: 6,
+ expectedState: 'RECOVERING',
+ expectedApplied: [1, 2, 3],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, 4, 5],
+ collectionContents: [1, 2, 3],
+ deletePoint: null,
+ begin: 3,
+ minValid: 6,
+ expectedState: 'RECOVERING',
+ expectedApplied: [1, 2, 3, 4, 5],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, 4, 5],
+ collectionContents: [1, 2, 3, 4, 5],
+ deletePoint: null,
+ begin: null,
+ minValid: 6,
+ expectedState: 'RECOVERING',
+ expectedApplied: [1, 2, 3, 4, 5],
+ });
+
+ //
+ // 3.4 -> 3.2 -> 3.4 downgrade/reupgrade cases
+ //
+
+ runTest({
+ oplogEntries: [1, 2, 3],
+ collectionContents: [1, 2, 3],
+ deletePoint: 4,
+ begin: 3,
+ minValid: 6,
+ expectedState: 'RECOVERING',
+ expectedApplied: [1, 2, 3],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, 4, 5],
+ collectionContents: [1, 2, 3],
+ deletePoint: 4,
+ begin: 3,
+ minValid: 6,
+ expectedState: 'RECOVERING',
+ expectedApplied: [1, 2, 3],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, /*4,*/ 5, 6],
+ collectionContents: [1, 2, 3],
+ deletePoint: 4,
+ begin: 3,
+ minValid: 6,
+ expectedState: 'RECOVERING',
+ expectedApplied: [1, 2, 3],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3],
+ collectionContents: [1, 2, 3],
+ deletePoint: 2,
+ begin: null,
+ minValid: 3,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3],
+ collectionContents: [1, 2, 3],
+ deletePoint: 2,
+ begin: 3,
+ minValid: 6,
+ expectedState: 'RECOVERING',
+ expectedApplied: [1, 2, 3],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, 4, 5],
+ collectionContents: [1, 2, 3],
+ deletePoint: 2,
+ begin: 3,
+ minValid: 6,
+ expectedState: 'RECOVERING',
+ expectedApplied: [1, 2, 3, 4, 5],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, 4, 5, 6],
+ collectionContents: [1, 2, 3],
+ deletePoint: 2,
+ begin: 3,
+ minValid: 6,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3, 4, 5, 6],
+ });
+
+ //
+ // These states should be impossible to get into.
+ //
+
+ runTest({
+ oplogEntries: [1, 2, 3],
+ collectionContents: [1, 2, 3, 4],
+ deletePoint: null,
+ begin: 4,
+ minValid: null, // doesn't matter.
+ expectedState: 'FATAL',
+ });
+
+ runTest({
+ oplogEntries: [4, 5, 6],
+ collectionContents: [1, 2],
+ deletePoint: 2,
+ begin: 3,
+ minValid: null, // doesn't matter.
+ expectedState: 'FATAL',
+ });
+
+ runTest({
+ oplogEntries: [4, 5, 6],
+ collectionContents: [1, 2],
+ deletePoint: null,
+ begin: 3,
+ minValid: null, // doesn't matter.
+ expectedState: 'FATAL',
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, 4, 5, 6],
+ collectionContents: [1, 2, 3],
+ deletePoint: 2,
+ begin: 3,
+ minValid: 3,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3, 4, 5, 6],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, 4, 5, 6],
+ collectionContents: [1, 2, 3, 4, 5],
+ deletePoint: null,
+ begin: 5,
+ minValid: 3,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3, 4, 5, 6],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, 4, 5, 6],
+ collectionContents: [1, 2, 3, 4, 5],
+ deletePoint: null,
+ begin: 5,
+ minValid: null,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3, 4, 5, 6],
+ });
+
+ runTest({
+ oplogEntries: [1, 2, 3, 4, 5],
+ collectionContents: [1],
+ deletePoint: 4,
+ begin: 1,
+ minValid: 3,
+ expectedState: 'SECONDARY',
+ expectedApplied: [1, 2, 3],
+ });
+
+ rst.stopSet();
+})();
diff --git a/jstests/replsets/oplog_truncated_on_recovery.js b/jstests/replsets/oplog_truncated_on_recovery.js
index 96be5865cc3..4d469178691 100644
--- a/jstests/replsets/oplog_truncated_on_recovery.js
+++ b/jstests/replsets/oplog_truncated_on_recovery.js
@@ -1,10 +1,10 @@
/**
- * This test will ensure that a failed a batch apply will remove the any oplog
+ * This test will ensure that recovery from a failed batch application will remove the oplog
* entries from that batch.
*
* To do this we:
* -- Create single node replica set
- * -- Set minvalid manually on primary way ahead (5 minutes)
+ * -- Set minvalid manually on primary way ahead (5 days)
* -- Write some oplog entries newer than minvalid.start
* -- Ensure restarted primary comes up in recovering and truncates the oplog
* -- Success!
@@ -40,35 +40,36 @@
// Write op
log(assert.writeOK(testDB.foo.save({_id: 1, a: 1}, {writeConcern: {w: 1}})));
- // Set minvalid to something far in the future for the current primary, to
- // simulate recovery.
- // Note: This is so far in the future (5 days) that it will never become
- // secondary.
+ // Set minvalid to something far in the future for the current primary, to simulate recovery.
+ // Note: This is so far in the future (5 days) that it will never become secondary.
var farFutureTS = new Timestamp(
Math.floor(new Date().getTime() / 1000) + (60 * 60 * 24 * 5 /* in five days */), 0);
var rsgs = assert.commandWorked(localDB.adminCommand("replSetGetStatus"));
log(rsgs);
var primaryOpTime = rsgs.members[0].optime;
- var primaryLastTS = rsgs.members[0].optime.ts;
- log(primaryLastTS);
+ log(primaryOpTime);
// Set the start of the failed batch
- primaryOpTime.ts = new Timestamp(primaryOpTime.ts.t, primaryOpTime.ts.i + 1);
+ // TODO this test should restart in stand-alone mode to futz with the state rather than trying
+ // to do it on a running primary.
- log(primaryLastTS);
jsTest.log("future TS: " + tojson(farFutureTS) + ", date:" + tsToDate(farFutureTS));
- // We do an update in case there is a minvalid document on the primary
- // already.
- // If the doc doesn't exist then upsert:true will create it, and the
- // writeConcern ensures
- // that update returns details of the write, like whether an update or
- // insert was performed.
- log(assert.writeOK(
- minvalidColl.update({},
- {ts: farFutureTS, t: NumberLong(-1), begin: primaryOpTime},
- {upsert: true, writeConcern: {w: 1}})));
+ var divergedTS = new Timestamp(primaryOpTime.ts.t, primaryOpTime.ts.i + 1);
+ // We do an update in case there is a minvalid document on the primary already.
+ // If the doc doesn't exist then upsert:true will create it, and the writeConcern ensures
+ // that update returns details of the write, like whether an update or insert was performed.
+ log(assert.writeOK(minvalidColl.update({},
+ {
+ ts: farFutureTS,
+ t: NumberLong(-1),
+ begin: primaryOpTime,
+ oplogDeleteFromPoint: divergedTS
+ },
+ {upsert: true, writeConcern: {w: 1}})));
- log(assert.writeOK(localDB.oplog.rs.insert({_id: 0, ts: primaryOpTime.ts, op: "n", term: -1})));
+ // Insert a diverged oplog entry that will be truncated after restart.
+ log(assert.writeOK(localDB.oplog.rs.insert(
+ {_id: 0, ts: divergedTS, op: "n", h: NumberLong(0), t: NumberLong(-1)})));
log(localDB.oplog.rs.find().toArray());
log(assert.commandWorked(localDB.adminCommand("replSetGetStatus")));
log("restart primary");
@@ -88,7 +89,7 @@
var lastTS = localDB.oplog.rs.find().sort({$natural: -1}).limit(-1).next().ts;
log(localDB.oplog.rs.find().toArray());
- assert.eq(primaryLastTS, lastTS);
+ assert.eq(primaryOpTime.ts, lastTS);
return true;
});
diff --git a/jstests/replsets/slave_delay_clean_shutdown.js b/jstests/replsets/slave_delay_clean_shutdown.js
new file mode 100644
index 00000000000..db08dfab228
--- /dev/null
+++ b/jstests/replsets/slave_delay_clean_shutdown.js
@@ -0,0 +1,61 @@
+// SERVER-21118 don't hang at shutdown or apply ops too soon with slaveDelay.
+//
+// @tags: [requires_persistence]
+load('jstests/replsets/rslib.js');
+(function() {
+ "use strict";
+
+ var ns = "test.coll";
+
+ var rst = new ReplSetTest({
+ nodes: 2,
+ });
+
+ var conf = rst.getReplSetConfig();
+ conf.members[1].votes = 0;
+ conf.members[1].priority = 0;
+ conf.members[1].hidden = true;
+ conf.members[1].slaveDelay = 0; // Set later.
+
+ rst.startSet();
+ rst.initiate(conf);
+
+ var master = rst.getPrimary(); // Waits for PRIMARY state.
+
+ // Push some ops through before setting slave delay.
+ assert.writeOK(master.getCollection(ns).insert([{}, {}, {}], {writeConcern: {w: 2}}));
+
+ // Set slaveDelay and wait for secondary to receive the change.
+ conf = rst.getReplSetConfigFromNode();
+ conf.version++;
+ conf.members[1].slaveDelay = 24 * 60 * 60;
+ reconfig(rst, conf);
+ assert.soon(() => rst.getReplSetConfigFromNode(1).members[1].slaveDelay > 0,
+ () => rst.getReplSetConfigFromNode(1));
+
+ sleep(2000); // The secondary apply loop only checks for slaveDelay changes once per second.
+ var secondary = rst.getSecondary();
+ const lastOp = getLatestOp(secondary);
+
+ assert.writeOK(master.getCollection(ns).insert([{}, {}, {}]));
+ assert.soon(() => secondary.adminCommand('serverStatus').metrics.repl.buffer.count > 0,
+ () => secondary.adminCommand('serverStatus').metrics.repl);
+ assert.neq(getLatestOp(master), lastOp);
+ assert.eq(getLatestOp(secondary), lastOp);
+
+ sleep(2000); // Prevent the test from passing by chance.
+ assert.eq(getLatestOp(secondary), lastOp);
+
+ // Make sure shutdown won't take a long time due to I/O.
+ secondary.adminCommand('fsync');
+
+ // Shutting down shouldn't take long.
+ assert.lt(Date.timeFunc(() => rst.stop(1)), 60 * 1000);
+
+ secondary = rst.restart(1);
+ assert.eq(getLatestOp(secondary), lastOp);
+ sleep(2000); // Prevent the test from passing by chance.
+ assert.eq(getLatestOp(secondary), lastOp);
+
+ rst.stopSet();
+})();