diff options
11 files changed, 260 insertions, 11 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml b/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml index 87577f23687..1173146fa1e 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml @@ -10,6 +10,7 @@ selector: - jstests/replsets/initial_sync_rename_collection.js - jstests/replsets/initial_sync_drop_collection.js - jstests/replsets/step_down_chaining_disabled.js + - jstests/replsets/rollback_via_refetch_anomaly.js executor: config: shell_options: diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 86a1d4829d1..e0e27c3e764 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -40,6 +40,8 @@ replica_sets_jscore_multiversion_passthrough: test_file: jstests/core/apply_ops_system_dot_views.js - ticket: SERVER-48614 test_file: jstests/core/wildcard_index_partial_index.js +- ticket: SERVER-48518 + test_file: jstests/replsets/rollback_via_refetch_anomaly.js sharding_jscore_multiversion_passthrough: - ticket: SERVER-47773 diff --git a/jstests/replsets/libs/rollback_test.js b/jstests/replsets/libs/rollback_test.js index 7ab5068aa97..b4420dc67b0 100644 --- a/jstests/replsets/libs/rollback_test.js +++ b/jstests/replsets/libs/rollback_test.js @@ -429,12 +429,46 @@ function RollbackTest(name = "RollbackTest", replSet) { }; /** + * Insert on primary until its lastApplied is >= the rollback node's. + */ + const _awaitPrimaryAppliedSurpassesRollbackApplied = function() { + log(`Waiting for lastApplied on sync source ${curPrimary.host} to surpass lastApplied` + + ` on rollback node ${curSecondary.host}`); + + function lastApplied(node) { + const reply = assert.commandWorked(node.adminCommand({replSetGetStatus: 1})); + return reply.optimes.appliedOpTime.ts; + } + + const rollbackApplied = lastApplied(curSecondary); + assert.soon(() => { + const primaryApplied = lastApplied(curPrimary); + jsTestLog( + `lastApplied on sync source ${curPrimary.host}:` + + ` ${tojson(primaryApplied)}, lastApplied on rollback node ${curSecondary.host}:` + + ` ${tojson(rollbackApplied)}`); + + if (timestampCmp(primaryApplied, rollbackApplied) >= 0) { + return true; + } + + let crudColl = curPrimary.getDB("test")["awaitPrimaryAppliedSurpassesRollbackApplied"]; + assert.commandWorked(crudColl.insertOne({})); + }, "primary's lastApplied never surpassed rollback node's"); + }; + + /** * Transition to the second stage of rollback testing, where we isolate the old primary and * elect the old secondary as the new primary. Then, operations can be performed on the new * primary so that that optimes diverge and previous operations on the old primary will be * rolled back. + * + * If awaitPrimaryAppliedSurpassesRollbackApplied, insert on new primary until its lastApplied + * is >= the rollback node's. Useful for testing rollback via refetch, which completes rollback + * recovery when new lastApplied >= old top of oplog. */ - this.transitionToSyncSourceOperationsBeforeRollback = function() { + this.transitionToSyncSourceOperationsBeforeRollback = function( + {awaitPrimaryAppliedSurpassesRollbackApplied = false} = {}) { const start = new Date(); transitionIfAllowed(State.kSyncSourceOpsBeforeRollback); @@ -498,6 +532,10 @@ function RollbackTest(name = "RollbackTest", replSet) { lastRBID = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid; + if (awaitPrimaryAppliedSurpassesRollbackApplied) { + _awaitPrimaryAppliedSurpassesRollbackApplied(); + } + log(`RollbackTest transition to ${curState} took ${(new Date() - start)} ms`); // The current primary, which is the old secondary, will later become the sync source. return curPrimary; @@ -550,6 +588,10 @@ function RollbackTest(name = "RollbackTest", replSet) { return curSecondary; }; + this.getTieBreaker = function() { + return tiebreakerNode; + }; + this.restartNode = function(nodeId, signal, startOptions, allowedExitCode) { assert(signal === SIGKILL || signal === SIGTERM, `Received unknown signal: ${signal}`); assert.gte(nodeId, 0, "Invalid argument to RollbackTest.restartNode()"); diff --git a/jstests/replsets/rollback_via_refetch_anomaly.js b/jstests/replsets/rollback_via_refetch_anomaly.js new file mode 100644 index 00000000000..1a3f192e498 --- /dev/null +++ b/jstests/replsets/rollback_via_refetch_anomaly.js @@ -0,0 +1,194 @@ +/** + * Rolled-back documents must not be visible when querying a recovered secondary. + * + * This test attempts to reproduce SERVER-48518. In the following description, 'T' is the tiebreaker + * node, 'P1' and 'P2' are primaries in terms 1 and 2. + * + * - Begin in RollbackTest.kSteadyStateOps with primary P1, all nodes connected: + * + * T + * / \ + * P1 - P2 + * primary secondary + * + * - Insert _id 0 into P1 at timestamp 1. + * - Transition to kRollbackOps by disconnecting P1 from P2: + * + * T + * / + * P1 P2 + * primary secondary + * + * - Insert _id 1 into P1 at timestamp 2. + * + * TS 1 TS 2 + * insert 0 insert 1 + * P1 ---------------> + * P2 ---> + * + * - Isolate P1 from T, connect P2 to T: + * + * T + * \ + * P1 P2 + * primary new primary + * + * (Same as RollbackTest.transitionToSyncSourceOperationsBeforeRollback(), except do *not* trigger a + * stepdown on P1.) + * + * - Step up P2, which writes a no-op to its oplog at timestamp 3. + * + * TS 1 TS 2 + * insert 0 insert 1 + * + * P1 ---------------> + * P2 ----* + * \ + * *--------------------------> + * no-op + * TS 3 + * + * - Delete _id 0 and 1 from P1 at timestamp 4. + * + * TS 1 TS 2 TS 4 + * insert 0 insert 1 delete 0 and 1 + * + * P1 ---------------------------------------------------------------> + * P2 ----* + * \ + * *--------------------------> + * no-op + * TS 3 + * + * - Reconnect P1 to P2 so it rolls back. + * + * T + * \ + * P1 - P2 + * rollback new primary + * + * Rollback via refetch undoes the delete of _id 0 by reinserting _id 0 in P1 with an + * untimestamped write. (It can't undo the delete of _id 1, since P2 doesn't have _id 1.) + * + * Before we fixed SERVER-48518, P1 served queries at lastApplied = top of P2's oplog = TS 3, + * which includes _id 0, _id 1, and _id 0 again (it was reinserted with an untimestamped write). + * To fix SERVER-48518, P1 won't transition from ROLLBACK until its lastApplied >= max(P1's oplog + * top, P2's oplog top) = TS 4. + * + * - Write to P2 so it advances >= timestamp 4 and satisfies P1's conditions to finish rollback. + * - Wait for P1 to finish rollback and transition to SECONDARY. + * - Query P1 and check that rolled-back records aren't visible. + * + * To end the test, RollbackTest.transitionToSteadyStateOperations won't work, we've diverged from + * the state it expects, so we end the test manually. Reconnect P1 to T, enable replication on T, + * and stop the replica set. + * + * T + * / \ + * P1 - P2 + * secondary new primary + */ + +(function() { +"use strict"; + +load("jstests/core/txns/libs/prepare_helpers.js"); +load("jstests/libs/write_concern_util.js"); +load("jstests/replsets/libs/rollback_test.js"); + +const rst = new ReplSetTest({ + nodes: 3, + useBridge: true, + nodeOptions: { + enableMajorityReadConcern: "false", + setParameter: {logComponentVerbosity: tojsononeline({replication: 2})} + } +}); + +rst.startSet(); +const config = rst.getReplSetConfig(); +config.members[2].priority = 0; +config.settings = { + chainingAllowed: false +}; +rst.initiateWithHighElectionTimeout(config); + +const rollbackTest = new RollbackTest(jsTestName(), rst); +const P1 = rollbackTest.getPrimary(); +const P2 = rollbackTest.getSecondary(); +const T = rollbackTest.getTieBreaker(); + +jsTestLog(`Node P1: ${P1.host}, P2: ${P2.host}, T: ${T.host}`); + +let testDB = P1.getDB(jsTestName()); +const testColl = testDB.getCollection("test"); + +let reply = assert.commandWorked(testColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}})); +jsTestLog(`Inserted _id 0 into P1 ${reply.operationTime}`); + +rollbackTest.transitionToRollbackOperations(); +reply = assert.commandWorked(testColl.insert({_id: 1}, {"writeConcern": {"w": 1}})); +jsTestLog(`Inserted _id 1 into P1 ${reply.operationTime}`); + +jsTestLog("Isolating P1 from tiebreaker"); +P1.disconnect([T]); + +jsTestLog("Reconnecting P2 to the tiebreaker"); +P2.reconnect([T]); + +jsTestLog("Step up P2"); +assert.soonNoExcept(() => { + const res = P2.adminCommand({replSetStepUp: 1}); + return res.ok; +}, "Failed to step up P2", ReplSetTest.kDefaultTimeoutMS); +// "Transition to primary complete; database writes are now permitted". +checkLog.containsJson(P2, 21331); +jsTestLog("P2 stepped up"); + +reply = assert.commandWorked(testDB.runCommand({delete: "test", deletes: [{q: {}, limit: 0}]})); +jsTestLog(`Deleted from P1 at ${reply.operationTime}`); + +// Ensure P1's lastApplied > P2's, even if P1's set-up entry was written at the same timestamp as +// P2's delete timestamp. +assert.soon(() => { + testDB.runCommand({insert: "otherCollection", documents: [{}]}); + function lastApplied(node) { + const reply = assert.commandWorked(node.adminCommand({replSetGetStatus: 1})); + return reply.optimes.appliedOpTime.ts; + } + const P1applied = lastApplied(P1); + const P2applied = lastApplied(P2); + jsTestLog(`P1 lastApplied ${P1applied}, P2 lastApplied ${P2applied}`); + return timestampCmp(P1applied, P2applied) > 0; +}, "P1's lastApplied never surpassed P2's"); + +jsTestLog("Reconnecting P1 to P2, so P1 rolls back"); +P1.reconnect([P2]); +// "Rollback using the 'rollbackViaRefetch' method". +checkLog.containsJson(P1, 21103); +// "Finding the Common Point". +checkLog.containsJson(P1, 21682); +// "We cannot transition to SECONDARY state because our 'lastApplied' optime is less than the +// initial data timestamp and enableMajorityReadConcern = false". +checkLog.containsJson(P1, 4851800); + +reply = assert.commandWorked( + P2.getDB(jsTestName()).runCommand({insert: "anotherCollection", documents: [{}]})); +jsTestLog(`Inserted into P2 at ${tojson(reply.operationTime)}`); + +jsTestLog("Wait for P1 to enter SECONDARY"); +waitForState(P1, ReplSetTest.State.SECONDARY); + +// Both counts should be 1. If SERVER-48518 isn't fixed then itCount() = 3: _ids 0, 1, and 0 again! +jsTestLog("Check collection count"); +let itCount = testColl.find().itcount(); +let fastCount = testColl.count(); +assert.eq(itCount, + fastCount, + `count: ${fastCount}, itCount: ${itCount}, data: ${tojson(testColl.find().toArray())}`); + +jsTestLog("Check succeeded. Ending test."); +P1.reconnect([T]); +restartServerReplication(T); +rst.stopSet(); +}()); diff --git a/jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js b/jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js index e9bd8876672..37bb9da1d31 100644 --- a/jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js +++ b/jstests/replsets/rollback_via_refetch_survives_nonexistent_collection_drop.js @@ -42,7 +42,8 @@ assert.commandWorked(rollbackDB.adminCommand( assert.commandWorked(rollbackDB.runCommand({create: collName})); // Step down the current primary and elect the node that does not have the collection. -rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); +rollbackTest.transitionToSyncSourceOperationsBeforeRollback( + {awaitPrimaryAppliedSurpassesRollbackApplied: true}); jsTestLog("Attempting to roll back."); // Make the old primary rollback against the new primary. This attempt should fail because the diff --git a/jstests/replsets/transactions_after_rollback_via_refetch.js b/jstests/replsets/transactions_after_rollback_via_refetch.js index 80ef4a8ded9..16c1e56b6d6 100644 --- a/jstests/replsets/transactions_after_rollback_via_refetch.js +++ b/jstests/replsets/transactions_after_rollback_via_refetch.js @@ -70,7 +70,8 @@ CommonOps(rollbackTest.getPrimary()); let rollbackNode = rollbackTest.transitionToRollbackOperations(); RollbackOps(rollbackNode); -let syncSourceNode = rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); +let syncSourceNode = rollbackTest.transitionToSyncSourceOperationsBeforeRollback( + {awaitPrimaryAppliedSurpassesRollbackApplied: true}); SyncSourceOps(syncSourceNode); // Wait for rollback to finish. diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 0a85a178ac7..ed9a5753956 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -5122,6 +5122,22 @@ void ReplicationCoordinatorImpl::finishRecoveryIfEligible(OperationContext* opCt return; } + // Rolling back with eMRC false, we set initialDataTimestamp to max(local oplog top, source's + // oplog top), then rollback via refetch. Data is inconsistent until lastApplied >= + // initialDataTimestamp. + auto initialTs = opCtx->getServiceContext()->getStorageEngine()->getInitialDataTimestamp(); + if (lastApplied.getTimestamp() < initialTs) { + invariant(!serverGlobalParams.enableMajorityReadConcern); + LOGV2_DEBUG(4851800, + 2, + "We cannot transition to SECONDARY state because our 'lastApplied' optime is " + "less than the initial data timestamp and enableMajorityReadConcern = false", + "minValid"_attr = minValid, + "lastApplied"_attr = lastApplied, + "initialDataTimestamp"_attr = initialTs); + return; + } + // Execute the transition to SECONDARY. auto status = setFollowerMode(MemberState::RS_SECONDARY); if (!status.isOK()) { diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp index bcded20bb6d..1e3977ee532 100644 --- a/src/mongo/db/storage/storage_engine_impl.cpp +++ b/src/mongo/db/storage/storage_engine_impl.cpp @@ -778,7 +778,6 @@ void StorageEngineImpl::setStableTimestamp(Timestamp stableTimestamp, bool force } void StorageEngineImpl::setInitialDataTimestamp(Timestamp initialDataTimestamp) { - _initialDataTimestamp = initialDataTimestamp; _engine->setInitialDataTimestamp(initialDataTimestamp); } diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h index 6ef1a605992..7f658ba910b 100644 --- a/src/mongo/db/storage/storage_engine_impl.h +++ b/src/mongo/db/storage/storage_engine_impl.h @@ -398,7 +398,6 @@ private: const bool _supportsDocLocking; const bool _supportsCappedCollections; - Timestamp _initialDataTimestamp = Timestamp::kAllowUnstableCheckpointsSentinel; std::unique_ptr<RecordStore> _catalogRecordStore; std::unique_ptr<DurableCatalogImpl> _catalog; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 3fa21ec4f8d..7d98f3c7205 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -2400,10 +2400,6 @@ Timestamp WiredTigerKVEngine::getCheckpointTimestamp() const { return Timestamp(_getCheckpointTimestamp()); } -Timestamp WiredTigerKVEngine::getInitialDataTimestamp() const { - return Timestamp(_initialDataTimestamp.load()); -} - std::uint64_t WiredTigerKVEngine::_getCheckpointTimestamp() const { char buf[(2 * 8 /*bytes in hex*/) + 1 /*nul terminator*/]; invariantWTOK(_conn->query_timestamp(_conn, buf, "get=last_checkpoint")); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h index df0dccc4373..09152ec80fe 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h @@ -323,8 +323,6 @@ public: Timestamp getOldestTimestamp() const override; Timestamp getCheckpointTimestamp() const override; - Timestamp getInitialDataTimestamp() const; - /** * Returns the data file path associated with an ident on disk. Returns boost::none if the data * file can not be found. This will attempt to locate a file even if the storage engine's own |