summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorA. Jesse Jiryu Davis <jesse@mongodb.com>2020-08-01 08:50:55 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-08-20 23:07:09 +0000
commit88c0265e057f0e5581306f294d1ca2bda19760e4 (patch)
tree2ced55960fb66f096d10a157191522ff6e3749da
parent329d8c517d8b3c3fb4bcb63eecf6064ac9a007cf (diff)
downloadmongo-88c0265e057f0e5581306f294d1ca2bda19760e4.tar.gz
SERVER-48518 Fix rollback via refetch anomaly
(cherry picked from commit eee49c64cdeb8fa95704b9a316b779eb5eb9800c)
-rw-r--r--buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml1
-rw-r--r--etc/backports_required_for_multiversion_tests.yml2
-rw-r--r--jstests/replsets/libs/rollback_test.js41
-rw-r--r--jstests/replsets/libs/rollback_test_deluxe.js4
-rw-r--r--jstests/replsets/rollback_via_refetch_anomaly.js198
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp16
-rw-r--r--src/mongo/db/storage/storage_engine_impl.cpp1
-rw-r--r--src/mongo/db/storage/storage_engine_impl.h1
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp4
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h2
10 files changed, 262 insertions, 8 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml b/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml
index d2be04d2836..1d59cc2f244 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml
@@ -12,6 +12,7 @@ selector:
- jstests/replsets/apply_prepare_txn_write_conflict_robustness.js
- jstests/replsets/initial_sync_fails_unclean_restart.js
- jstests/replsets/invalidate_sessions_on_stepdown.js
+ - jstests/replsets/rollback_via_refetch_anomaly.js
# This tag file can be created using ./buildscripts/evergreen_gen_multiversion_tests.py
# or downloaded from the evergreen task.
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index d0b3f3ebe62..31cad5c2ff7 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -101,6 +101,8 @@ suites:
replica_sets_multiversion:
- ticket: SERVER-47645
test_file: jstests/replsets/invalidate_sessions_on_stepdown.js
+ - ticket: SERVER-48518
+ test_file: jstests/replsets/rollback_via_refetch_anomaly.js
concurrency_sharded_replication_multiversion:
diff --git a/jstests/replsets/libs/rollback_test.js b/jstests/replsets/libs/rollback_test.js
index b3b8589ed27..64a11b1b3e7 100644
--- a/jstests/replsets/libs/rollback_test.js
+++ b/jstests/replsets/libs/rollback_test.js
@@ -414,6 +414,36 @@ function RollbackTest(name = "RollbackTest", replSet) {
};
/**
+ * Insert on primary until its lastApplied >= the rollback node's. Useful for testing rollback
+ * via refetch, which completes rollback recovery when new lastApplied >= old top of oplog.
+ */
+ const _awaitPrimaryAppliedSurpassesRollbackApplied = function() {
+ log(`Waiting for lastApplied on sync source ${curPrimary.host} to surpass lastApplied` +
+ ` on rollback node ${curSecondary.host}`);
+
+ function lastApplied(node) {
+ const reply = assert.commandWorked(node.adminCommand({replSetGetStatus: 1}));
+ return reply.optimes.appliedOpTime.ts;
+ }
+
+ const rollbackApplied = lastApplied(curSecondary);
+ assert.soon(() => {
+ const primaryApplied = lastApplied(curPrimary);
+ jsTestLog(
+ `lastApplied on sync source ${curPrimary.host}:` +
+ ` ${tojson(primaryApplied)}, lastApplied on rollback node ${curSecondary.host}:` +
+ ` ${tojson(rollbackApplied)}`);
+
+ if (timestampCmp(primaryApplied, rollbackApplied) >= 0) {
+ return true;
+ }
+
+ let crudColl = curPrimary.getDB("test")["awaitPrimaryAppliedSurpassesRollbackApplied"];
+ assert.commandWorked(crudColl.insertOne({}));
+ }, "primary's lastApplied never surpassed rollback node's");
+ };
+
+ /**
* Transition to the second stage of rollback testing, where we isolate the old primary and
* elect the old secondary as the new primary. Then, operations can be performed on the new
* primary so that that optimes diverge and previous operations on the old primary will be
@@ -479,6 +509,13 @@ function RollbackTest(name = "RollbackTest", replSet) {
lastRBID = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid;
+ const isMajorityReadConcernEnabledOnRollbackNode =
+ assert.commandWorked(curSecondary.adminCommand({serverStatus: 1}))
+ .storageEngine.supportsCommittedReads;
+ if (!isMajorityReadConcernEnabledOnRollbackNode) {
+ _awaitPrimaryAppliedSurpassesRollbackApplied();
+ }
+
// The current primary, which is the old secondary, will later become the sync source.
return curPrimary;
};
@@ -524,6 +561,10 @@ function RollbackTest(name = "RollbackTest", replSet) {
return curSecondary;
};
+ this.getTieBreaker = function() {
+ return tiebreakerNode;
+ };
+
this.restartNode = function(nodeId, signal, startOptions, allowedExitCode) {
assert(signal === SIGKILL || signal === SIGTERM, `Received unknown signal: ${signal}`);
assert.gte(nodeId, 0, "Invalid argument to RollbackTest.restartNode()");
diff --git a/jstests/replsets/libs/rollback_test_deluxe.js b/jstests/replsets/libs/rollback_test_deluxe.js
index bbbbcc1fe36..e13bd134010 100644
--- a/jstests/replsets/libs/rollback_test_deluxe.js
+++ b/jstests/replsets/libs/rollback_test_deluxe.js
@@ -464,6 +464,10 @@ function RollbackTestDeluxe(name = "FiveNodeDoubleRollbackTest", replSet) {
* | A
*/
this.transitionToSyncSourceOperationsBeforeRollback = function() {
+ assert(jsTest.options().enableMajorityReadConcern !== false,
+ "Copy _awaitPrimaryAppliedSurpassesRollbackApplied from RollbackTest, or ban this" +
+ " test with enableMajorityReadConcern false");
+
transitionIfAllowed(State.kSyncSourceOpsBeforeRollback);
// Insert one document to ensure rollback will not be skipped.
diff --git a/jstests/replsets/rollback_via_refetch_anomaly.js b/jstests/replsets/rollback_via_refetch_anomaly.js
new file mode 100644
index 00000000000..ac96adeab26
--- /dev/null
+++ b/jstests/replsets/rollback_via_refetch_anomaly.js
@@ -0,0 +1,198 @@
+/**
+ * Rolled-back documents must not be visible when querying a recovered secondary.
+ *
+ * This test attempts to reproduce SERVER-48518. In the following description, 'T' is the tiebreaker
+ * node, 'P1' and 'P2' are primaries in terms 1 and 2.
+ *
+ * - Begin in RollbackTest.kSteadyStateOps with primary P1, all nodes connected:
+ *
+ * T
+ * / \
+ * P1 - P2
+ * primary secondary
+ *
+ * - Insert _id 0 into P1 at timestamp 1.
+ * - Transition to kRollbackOps by disconnecting P1 from P2:
+ *
+ * T
+ * /
+ * P1 P2
+ * primary secondary
+ *
+ * - Insert _id 1 into P1 at timestamp 2.
+ *
+ * TS 1 TS 2
+ * insert 0 insert 1
+ * P1 --------------->
+ * P2 --->
+ *
+ * - Isolate P1 from T, connect P2 to T:
+ *
+ * T
+ * \
+ * P1 P2
+ * primary new primary
+ *
+ * (Same as RollbackTest.transitionToSyncSourceOperationsBeforeRollback(), except do *not* trigger a
+ * stepdown on P1.)
+ *
+ * - Step up P2, which writes a no-op to its oplog at timestamp 3.
+ *
+ * TS 1 TS 2
+ * insert 0 insert 1
+ *
+ * P1 --------------->
+ * P2 ----*
+ * \
+ * *-------------------------->
+ * no-op
+ * TS 3
+ *
+ * - Delete _id 0 and 1 from P1 at timestamp 4.
+ *
+ * TS 1 TS 2 TS 4
+ * insert 0 insert 1 delete 0 and 1
+ *
+ * P1 --------------------------------------------------------------->
+ * P2 ----*
+ * \
+ * *-------------------------->
+ * no-op
+ * TS 3
+ *
+ * - Reconnect P1 to P2 so it rolls back.
+ *
+ * T
+ * \
+ * P1 - P2
+ * rollback new primary
+ *
+ * Rollback via refetch undoes the delete of _id 0 by reinserting _id 0 in P1 with an
+ * untimestamped write. (It can't undo the delete of _id 1, since P2 doesn't have _id 1.)
+ *
+ * Before we fixed SERVER-48518, P1 served queries at lastApplied = top of P2's oplog = TS 3,
+ * which includes _id 0, _id 1, and _id 0 again (it was reinserted with an untimestamped write).
+ * To fix SERVER-48518, P1 won't transition from ROLLBACK until its lastApplied >= max(P1's oplog
+ * top, P2's oplog top) = TS 4.
+ *
+ * - Write to P2 so it advances >= timestamp 4 and satisfies P1's conditions to finish rollback.
+ * - Wait for P1 to finish rollback and transition to SECONDARY.
+ * - Query P1 and check that rolled-back records aren't visible.
+ *
+ * To end the test, RollbackTest.transitionToSteadyStateOperations won't work, we've diverged from
+ * the state it expects, so we end the test manually. Reconnect P1 to T, enable replication on T,
+ * and stop the replica set.
+ *
+ * T
+ * / \
+ * P1 - P2
+ * secondary new primary
+ *
+ * @tags: [
+ * requires_wiredtiger
+ * ]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/core/txns/libs/prepare_helpers.js");
+load("jstests/libs/write_concern_util.js");
+load("jstests/replsets/libs/rollback_test.js");
+
+const rst = new ReplSetTest({
+ nodes: 3,
+ useBridge: true,
+ nodeOptions: {
+ enableMajorityReadConcern: "false",
+ setParameter: {logComponentVerbosity: tojsononeline({replication: 2})}
+ }
+});
+
+rst.startSet();
+const config = rst.getReplSetConfig();
+config.members[2].priority = 0;
+config.settings = {
+ chainingAllowed: false
+};
+rst.initiateWithHighElectionTimeout(config);
+
+const rollbackTest = new RollbackTest(jsTestName(), rst);
+const P1 = rollbackTest.getPrimary();
+const P2 = rollbackTest.getSecondary();
+const T = rollbackTest.getTieBreaker();
+
+jsTestLog(`Node P1: ${P1.host}, P2: ${P2.host}, T: ${T.host}`);
+
+let testDB = P1.getDB(jsTestName());
+const testColl = testDB.getCollection("test");
+
+let reply = assert.commandWorked(testColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}}));
+jsTestLog(`Inserted _id 0 into P1 ${reply.operationTime}`);
+
+rollbackTest.transitionToRollbackOperations();
+reply = assert.commandWorked(testColl.insert({_id: 1}, {"writeConcern": {"w": 1}}));
+jsTestLog(`Inserted _id 1 into P1 ${reply.operationTime}`);
+
+jsTestLog("Isolating P1 from tiebreaker");
+P1.disconnect([T]);
+
+jsTestLog("Reconnecting P2 to the tiebreaker");
+P2.reconnect([T]);
+
+jsTestLog("Step up P2");
+assert.soonNoExcept(() => {
+ const res = P2.adminCommand({replSetStepUp: 1});
+ return res.ok;
+}, "Failed to step up P2", ReplSetTest.kDefaultTimeoutMS);
+// "Transition to primary complete; database writes are now permitted".
+checkLog.containsJson(P2, 21331);
+jsTestLog("P2 stepped up");
+
+reply = assert.commandWorked(testDB.runCommand({delete: "test", deletes: [{q: {}, limit: 0}]}));
+jsTestLog(`Deleted from P1 at ${reply.operationTime}`);
+
+// Ensure P1's lastApplied > P2's, even if P1's set-up entry was written at the same timestamp as
+// P2's delete timestamp.
+assert.soon(() => {
+ testDB.runCommand({insert: "otherCollection", documents: [{}]});
+ function lastApplied(node) {
+ const reply = assert.commandWorked(node.adminCommand({replSetGetStatus: 1}));
+ return reply.optimes.appliedOpTime.ts;
+ }
+ const P1applied = lastApplied(P1);
+ const P2applied = lastApplied(P2);
+ jsTestLog(`P1 lastApplied ${P1applied}, P2 lastApplied ${P2applied}`);
+ return timestampCmp(P1applied, P2applied) > 0;
+}, "P1's lastApplied never surpassed P2's");
+
+jsTestLog("Reconnecting P1 to P2, so P1 rolls back");
+P1.reconnect([P2]);
+// "Rollback using the 'rollbackViaRefetch' method".
+checkLog.containsJson(P1, 21103);
+// "Finding the Common Point".
+checkLog.containsJson(P1, 21682);
+// "We cannot transition to SECONDARY state because our 'lastApplied' optime is less than the
+// initial data timestamp and enableMajorityReadConcern = false".
+checkLog.containsJson(P1, 4851800);
+
+reply = assert.commandWorked(
+ P2.getDB(jsTestName()).runCommand({insert: "anotherCollection", documents: [{}]}));
+jsTestLog(`Inserted into P2 at ${tojson(reply.operationTime)}`);
+
+jsTestLog("Wait for P1 to enter SECONDARY");
+waitForState(P1, ReplSetTest.State.SECONDARY);
+
+// Both counts should be 1. If SERVER-48518 isn't fixed then itCount() = 3: _ids 0, 1, and 0 again!
+jsTestLog("Check collection count");
+let itCount = testColl.find().itcount();
+let fastCount = testColl.count();
+assert.eq(itCount,
+ fastCount,
+ `count: ${fastCount}, itCount: ${itCount}, data: ${tojson(testColl.find().toArray())}`);
+
+jsTestLog("Check succeeded. Ending test.");
+P1.reconnect([T]);
+restartServerReplication(T);
+rst.stopSet();
+}());
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 943768b94b1..1ed89e6211e 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -4772,6 +4772,22 @@ void ReplicationCoordinatorImpl::finishRecoveryIfEligible(OperationContext* opCt
return;
}
+ // Rolling back with eMRC false, we set initialDataTimestamp to max(local oplog top, source's
+ // oplog top), then rollback via refetch. Data is inconsistent until lastApplied >=
+ // initialDataTimestamp.
+ auto initialTs = opCtx->getServiceContext()->getStorageEngine()->getInitialDataTimestamp();
+ if (lastApplied.getTimestamp() < initialTs) {
+ invariant(!serverGlobalParams.enableMajorityReadConcern);
+ LOGV2_DEBUG(4851800,
+ 2,
+ "We cannot transition to SECONDARY state because our 'lastApplied' optime is "
+ "less than the initial data timestamp and enableMajorityReadConcern = false",
+ "minValid"_attr = minValid,
+ "lastApplied"_attr = lastApplied,
+ "initialDataTimestamp"_attr = initialTs);
+ return;
+ }
+
// Execute the transition to SECONDARY.
auto status = setFollowerMode(MemberState::RS_SECONDARY);
if (!status.isOK()) {
diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp
index 221c2378a9a..4f4d33f3f7d 100644
--- a/src/mongo/db/storage/storage_engine_impl.cpp
+++ b/src/mongo/db/storage/storage_engine_impl.cpp
@@ -781,7 +781,6 @@ void StorageEngineImpl::setStableTimestamp(Timestamp stableTimestamp, bool force
}
void StorageEngineImpl::setInitialDataTimestamp(Timestamp initialDataTimestamp) {
- _initialDataTimestamp = initialDataTimestamp;
_engine->setInitialDataTimestamp(initialDataTimestamp);
}
diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h
index 5acb225e86d..e1877aa8377 100644
--- a/src/mongo/db/storage/storage_engine_impl.h
+++ b/src/mongo/db/storage/storage_engine_impl.h
@@ -415,7 +415,6 @@ private:
const bool _supportsDocLocking;
const bool _supportsDBLocking;
const bool _supportsCappedCollections;
- Timestamp _initialDataTimestamp = Timestamp::kAllowUnstableCheckpointsSentinel;
std::unique_ptr<RecordStore> _catalogRecordStore;
std::unique_ptr<DurableCatalogImpl> _catalog;
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index 8d3838d93e1..e0c5cfe6653 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -2519,10 +2519,6 @@ Timestamp WiredTigerKVEngine::getCheckpointTimestamp() const {
return Timestamp(_getCheckpointTimestamp());
}
-Timestamp WiredTigerKVEngine::getInitialDataTimestamp() const {
- return Timestamp(_initialDataTimestamp.load());
-}
-
std::uint64_t WiredTigerKVEngine::_getCheckpointTimestamp() const {
char buf[(2 * 8 /*bytes in hex*/) + 1 /*nul terminator*/];
invariantWTOK(_conn->query_timestamp(_conn, buf, "get=last_checkpoint"));
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
index 0d6a2b7a900..a2ea4914dbd 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
@@ -317,8 +317,6 @@ public:
Timestamp getOldestTimestamp() const override;
Timestamp getCheckpointTimestamp() const override;
- Timestamp getInitialDataTimestamp() const;
-
/**
* Returns the data file path associated with an ident on disk. Returns boost::none if the data
* file can not be found. This will attempt to locate a file even if the storage engine's own