summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamy Lanka <samy.lanka@mongodb.com>2021-11-09 17:46:58 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-11-18 19:19:41 +0000
commit7b6775a0ad49992a3054b8bcfcda29914803fc5b (patch)
tree4e6cbbe8a8c791e2654f6949397949f78678271e
parentd80a227ecace76abaac8c440bb6214762085a2fa (diff)
downloadmongo-7b6775a0ad49992a3054b8bcfcda29914803fc5b.tar.gz
SERVER-59721 Stop using the minValid optime when resolving a sync source
(cherry picked from commit ba5d667c9a3f3105e0d68babc42a5b2d36524062)
-rw-r--r--etc/backports_required_for_multiversion_tests.yml8
-rw-r--r--jstests/noPassthrough/minvalid2.js106
-rw-r--r--jstests/replsets/apply_batch_only_goes_forward.js108
-rw-r--r--jstests/replsets/sync_source_selection_ignores_minvalid_after_rollback.js125
-rw-r--r--jstests/sharding/libs/resharding_test_fixture.js13
-rw-r--r--jstests/sharding/resharding_secondary_recovers_temp_ns_metadata.js3
-rw-r--r--src/mongo/db/repl/README.md4
-rw-r--r--src/mongo/db/repl/bgsync.cpp8
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp1
9 files changed, 136 insertions, 240 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index a5f0a2ef3dc..c384a5f0712 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -106,6 +106,10 @@ last-continuous:
test_file: jstests/sharding/resharding_histogram_metrics.js
- ticket: SERVER-60567
test_file: jstests/core/sbe/sbe_cmd.js
+ - ticket: SERVER-59721
+ test_file: jstests/replsets/sync_source_selection_ignores_minvalid_after_rollback.js
+ - ticket: SERVER-59721
+ test_file: jstests/sharding/resharding_secondary_recovers_temp_ns_metadata.js
# Tests that should only be excluded from particular suites should be listed under that suite.
suites:
@@ -372,6 +376,10 @@ last-lts:
test_file: jstests/sharding/resharding_histogram_metrics.js
- ticket: SERVER-60567
test_file: jstests/core/sbe/sbe_cmd.js
+ - ticket: SERVER-59721
+ test_file: jstests/replsets/sync_source_selection_ignores_minvalid_after_rollback.js
+ - ticket: SERVER-59721
+ test_file: jstests/sharding/resharding_secondary_recovers_temp_ns_metadata.js
# Tests that should only be excluded from particular suites should be listed under that suite.
suites:
diff --git a/jstests/noPassthrough/minvalid2.js b/jstests/noPassthrough/minvalid2.js
deleted file mode 100644
index 76dd0650184..00000000000
--- a/jstests/noPassthrough/minvalid2.js
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * This checks rollback, which shouldn't happen unless we have reached minvalid.
- * 1. make 3-member set w/arb (2)
- * 2. shut down secondary
- * 3. do writes to primary
- * 4. modify primary's minvalid
- * 5. shut down primary
- * 6. start up secondary
- * 7. writes on former secondary (now primary)
- * 8. start up primary
- * 9. check primary does not rollback
- *
- * If all data-bearing nodes in a replica set are using an ephemeral storage engine, the set will
- * not be able to survive a scenario where all data-bearing nodes are down simultaneously. In such a
- * scenario, none of the members will have any data, and upon restart will each look for a member to
- * initial sync from, so no primary will be elected. This test induces such a scenario, so cannot be
- * run on ephemeral storage engines.
- * @tags: [
- * requires_persistence,
- * requires_replication,
- * ]
- */
-
-// Skip db hash check because replset cannot reach consistent state.
-TestData.skipCheckDBHashes = true;
-
-print("1. make 3-member set w/arb (2)");
-var name = "minvalid";
-var replTest = new ReplSetTest({name: name, nodes: 3, oplogSize: 1, waitForKeys: true});
-var host = getHostName();
-
-var nodes = replTest.startSet();
-replTest.initiate({
- _id: name,
- members: [
- {_id: 0, host: host + ":" + replTest.ports[0]},
- {_id: 1, host: host + ":" + replTest.ports[1]},
- {_id: 2, host: host + ":" + replTest.ports[2], arbiterOnly: true}
- ]
-});
-var secondaries = replTest.getSecondaries();
-var primary = replTest.getPrimary();
-var primaryId = replTest.getNodeId(primary);
-var secondary = secondaries[0];
-var secondaryId = replTest.getNodeId(secondary);
-// The default WC is majority and this test can't satisfy majority writes.
-assert.commandWorked(primary.adminCommand(
- {setDefaultRWConcern: 1, defaultWriteConcern: {w: 1}, writeConcern: {w: "majority"}}));
-
-// Wait for primary to detect that the arbiter is up so that it won't step down when we later take
-// the secondary offline.
-replTest.waitForState(replTest.nodes[2], ReplSetTest.State.ARBITER);
-
-var mdb = primary.getDB("foo");
-
-mdb.foo.save({a: 1000});
-replTest.awaitReplication();
-
-print("2: shut down secondary");
-replTest.stop(secondaryId);
-
-print("3: write to primary");
-assert.commandWorked(mdb.foo.insert({a: 1001}, {writeConcern: {w: 1}}));
-
-print("4: modify primary's minvalid");
-var local = primary.getDB("local");
-var lastOp = local.oplog.rs.find().sort({$natural: -1}).limit(1).next();
-printjson(lastOp);
-
-// Overwrite minvalid document to simulate an inconsistent state (as might result from a server
-// crash.
-local.replset.minvalid.update({},
- {
- ts: new Timestamp(lastOp.ts.t, lastOp.ts.i + 1),
- t: NumberLong(-1),
- },
- {upsert: true});
-printjson(local.replset.minvalid.findOne());
-
-print("5: shut down primary");
-replTest.stop(primaryId);
-
-print("6: start up secondary");
-replTest.restart(secondaryId);
-
-print("7: writes on former secondary");
-primary = replTest.getPrimary();
-mdb1 = primary.getDB("foo");
-mdb1.foo.save({a: 1002});
-
-print("8: start up former primary");
-clearRawMongoProgramOutput();
-replTest.restart(primaryId);
-
-print("9: check former primary " + replTest.nodes[primaryId].host +
- " does not select former secondary " + secondary.host + " as sync source");
-replTest.waitForState(replTest.nodes[primaryId], ReplSetTest.State.RECOVERING, 90000);
-
-// Sync source selection will log this message if it does not detect min valid in the sync
-// source candidate's oplog.
-assert.soon(function() {
- return rawMongoProgramOutput().match(
- 'it does not contain the necessary operations for us to reach a consistent state');
-});
-
-replTest.stopSet();
diff --git a/jstests/replsets/apply_batch_only_goes_forward.js b/jstests/replsets/apply_batch_only_goes_forward.js
deleted file mode 100644
index e961750fc3d..00000000000
--- a/jstests/replsets/apply_batch_only_goes_forward.js
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- * This test will ensure that a failed a batch apply will become consistent only when passing
- * the end boundary (minvalid) in subsequent applies.
- *
- * To do this we:
- * -- Set minvalid manually on primary (node0) way ahead (5 minutes)
- * -- Restart primary (node0)
- * -- Ensure restarted primary (node0) comes up in recovering
- * -- Ensure node0 denylists new primary as a sync source and keeps the old minvalid
- * -- Success!
- *
- * This test requires persistence to test that a restarted primary will stay in the RECOVERING state
- * when minvalid is set to the future. An ephemeral storage engine will not have a minvalid after
- * restarting, so will initial sync in this scenario, invalidating the test.
- * @tags: [requires_persistence]
- */
-
-// Skip db hash check because replset cannot reach consistent state.
-TestData.skipCheckDBHashes = true;
-
-(function() {
-"use strict";
-
-function tsToDate(ts) {
- return new Date(ts.getTime() * 1000);
-}
-
-var replTest =
- new ReplSetTest({name: "apply_batch_only_goes_forward", nodes: [{}, {}, {arbiter: true}]});
-
-var nodes = replTest.startSet();
-replTest.initiate();
-var primary = replTest.getPrimary();
-var pTest = primary.getDB("test");
-var pLocal = primary.getDB("local");
-var mMinvalid = pLocal["replset.minvalid"];
-
-var secondary = replTest.getSecondary();
-var sTest = secondary.getDB("test");
-var sLocal = secondary.getDB("local");
-var sMinvalid = sLocal["replset.minvalid"];
-var stepDownSecs = 30;
-var stepDownCmd = {replSetStepDown: stepDownSecs, force: true};
-
-// Write op
-assert.commandWorked(
- pTest.foo.save({}, {writeConcern: {w: 'majority', wtimeout: ReplSetTest.kDefaultTimeoutMS}}));
-replTest.waitForState(secondary, ReplSetTest.State.SECONDARY);
-assert.commandWorked(
- pTest.foo.save({}, {writeConcern: {w: 'majority', wtimeout: ReplSetTest.kDefaultTimeoutMS}}));
-
-// Set minvalid to something far in the future for the current primary, to simulate recovery.
-// Note: This is so far in the future (5 days) that it will never become secondary.
-var farFutureTS = new Timestamp(
- Math.floor(new Date().getTime() / 1000) + (60 * 60 * 24 * 5 /* in five days*/), 0);
-
-jsTest.log("future TS: " + tojson(farFutureTS) + ", date:" + tsToDate(farFutureTS));
-// We do an update in case there is a minvalid document on the primary already.
-// If the doc doesn't exist then upsert:true will create it, and the writeConcern ensures
-// that update returns details of the write, like whether an update or insert was performed.
-const minValidUpdate = {
- $set: {ts: farFutureTS}
-};
-jsTestLog("Current minvalid is " + tojson(mMinvalid.findOne()));
-jsTestLog("Updating minValid to: " + tojson(minValidUpdate));
-printjson(assert.commandWorked(mMinvalid.update(
- {},
- minValidUpdate,
- {upsert: true, writeConcern: {w: 1, wtimeout: ReplSetTest.kDefaultTimeoutMS}})));
-
-jsTest.log('Restarting primary ' + primary.host +
- ' with updated minValid. This node will go into RECOVERING upon restart. ' +
- 'Secondary ' + secondary.host + ' will become new primary.');
-clearRawMongoProgramOutput();
-replTest.restart(primary);
-printjson(sLocal.adminCommand("hello"));
-replTest.waitForState(primary, ReplSetTest.State.RECOVERING);
-
-replTest.awaitNodesAgreeOnPrimary();
-// Secondary is now primary... Do a write to advance the optime on the primary so that it will be
-// considered as a sync source - this is more relevant to PV0 because we do not write a new
-// entry to the oplog on becoming primary.
-assert.commandWorked(replTest.getPrimary().getDB("test").foo.save(
- {}, {writeConcern: {w: 1, wtimeout: ReplSetTest.kDefaultTimeoutMS}}));
-
-// Sync source selection will log this message if it does not detect min valid in the sync
-// source candidate's oplog.
-assert.soon(function() {
- return rawMongoProgramOutput().match(
- 'it does not contain the necessary operations for us to reach a consistent state');
-});
-
-assert.soon(function() {
- var mv;
- try {
- mv = mMinvalid.findOne();
- } catch (e) {
- return false;
- }
- var msg = "ts !=, " + tojson(farFutureTS) + "(" + tsToDate(farFutureTS) +
- "), mv:" + tojson(mv) + " - " + tsToDate(mv.ts);
- assert.eq(farFutureTS, mv.ts, msg);
- return true;
-});
-
-// Shut down the set and finish the test.
-replTest.stopSet();
-})();
diff --git a/jstests/replsets/sync_source_selection_ignores_minvalid_after_rollback.js b/jstests/replsets/sync_source_selection_ignores_minvalid_after_rollback.js
new file mode 100644
index 00000000000..dbf9dea7636
--- /dev/null
+++ b/jstests/replsets/sync_source_selection_ignores_minvalid_after_rollback.js
@@ -0,0 +1,125 @@
+/**
+ * Tests that the minValid optime being on a divergent branch of history does not impact sync source
+ * selection after rollback. See SERVER-59721 for more details.
+ *
+ * TODO SERVER-49738: remove this test.
+ */
+(function() {
+"use strict";
+load("jstests/libs/fail_point_util.js");
+load('jstests/libs/parallel_shell_helpers.js');
+load('jstests/replsets/rslib.js'); // For syncFrom and awaitOpTime.
+
+// Disable primary catchup since this test relies on new primaries not catching up to other nodes.
+const rst = new ReplSetTest(
+ {name: jsTestName(), nodes: 3, settings: {catchUpTimeoutMillis: 0}, useBridge: true});
+const nodes = rst.startSet();
+rst.initiateWithHighElectionTimeout();
+
+const collName = jsTestName();
+const node0 = rst.getPrimary();
+const node1 = rst.getSecondaries()[0];
+const node2 = rst.getSecondaries()[1];
+
+const node0DB = node0.getDB("test");
+const node0Coll = node0DB.getCollection(collName);
+
+// The default WC is majority and various failpoints used in this test are incompatible with that.
+assert.commandWorked(node0.adminCommand(
+ {setDefaultRWConcern: 1, defaultWriteConcern: {w: 1}, writeConcern: {w: "majority"}}));
+
+// Make sure node 1 syncs from node 0 so that it will replicate entries that be rolled back.
+syncFrom(node1, node0, rst);
+
+jsTestLog("Do write that will become the new majority commit point");
+assert.commandWorked(
+ node0Coll.insert({_id: "majority committed"}, {writeConcern: {w: "majority"}}));
+
+rst.awaitReplication();
+
+jsTestLog("Disable snapshotting on all nodes");
+
+// Disable snapshotting on all members of the replica set so that further operations do not
+// enter the majority snapshot.
+nodes.forEach(node => assert.commandWorked(node.adminCommand(
+ {configureFailPoint: "disableSnapshotting", mode: "alwaysOn"})));
+
+// Stop replication on all nodes. We do this on node 0 and 1 so that they will vote for other nodes
+// in future elections. We use a different failpoint for node 1 so that it won't switch sync sources
+// when replication is unpaused. We stop replication on node 2 so that it doesn't receive any oplog
+// entries from the diverging branch of history.
+let node2StopRepl = configureFailPoint(node2, "stopReplProducer");
+let node1StopRepl = configureFailPoint(node1, "hangBeforeProcessingSuccessfulBatch");
+let node0StopRepl = configureFailPoint(node0, "stopReplProducer");
+configureFailPoint(node1, "disableMaxSyncSourceLagSecs");
+
+jsTestLog("Do write that will eventually be rolled back");
+
+assert.commandWorked(node0Coll.insert({_id: "diverging point"}));
+
+node1StopRepl.wait();
+node2StopRepl.wait();
+
+assert.commandWorked(node1.adminCommand({clearLog: 'global'}));
+
+jsTestLog("Stepping up node 2");
+
+// Node 2 runs for election. This is needed before node 1 steps up because otherwise it will always
+// lose future elections and will not be considered the proper branch of history.
+const electionShell = startParallelShell(() => {
+ const newPrimary = db.getMongo();
+ const rst = new ReplSetTest(newPrimary.host);
+ rst.stepUp(newPrimary, {awaitReplicationBeforeStepUp: false, awaitWritablePrimary: false});
+}, node2.port);
+
+jsTestLog("Waiting for node 1 to vote in election");
+checkLog.containsJson(node1, 5972100);
+
+jsTestLog("Waiting for node 1 to replicate diverging branch");
+node1StopRepl.off();
+awaitOpTime(node1, node0);
+
+jsTestLog("Waiting for node 2 to be writable primary");
+
+// Wait for parallelShell to exit. This means that node 2 has successfully transitioned to primary.
+electionShell();
+assert.eq(rst.getPrimary(), node2);
+
+jsTestLog("Waiting for node 0 to step down");
+rst.awaitSecondaryNodes(null, [node0]);
+
+// Node 0 won't replicate node 2's new primary oplog entry, so it can elect node 1 again.
+node0StopRepl.wait();
+
+jsTestLog("Stepping node 1 up");
+
+// Step up node 1, which causes an untimestamped write to the minValid collection.
+rst.stepUp(node1, {awaitReplicationBeforeStepUp: false});
+
+jsTestLog("Stepping node 2 up");
+
+// Node 0 votes for node 2 in this eleciton. Node 2 is ahead of node 0 because of the previous
+// election that it won.
+rst.stepUp(node2, {awaitReplicationBeforeStepUp: false});
+
+const node2Coll = node2.getDB("test").getCollection(collName);
+
+node0StopRepl.off();
+node2StopRepl.off();
+
+jsTestLog("Doing a write on the proper branch of history");
+assert.commandWorked(node2Coll.insert({_id: "proper branch of history"}));
+
+jsTestLog("Waiting for node 1 to complete rollback");
+rst.awaitSecondaryNodes();
+
+jsTestLog("Node 1 completed rollback");
+
+// awaitReplication will only succeed if node 1 was able to successfully choose a sync source.
+rst.awaitReplication();
+
+assert.eq(node2Coll.find({_id: "proper branch of history"}).itcount(), 1);
+assert.eq(node2Coll.find({_id: "diverging point"}).itcount(), 0);
+
+rst.stopSet();
+})(); \ No newline at end of file
diff --git a/jstests/sharding/libs/resharding_test_fixture.js b/jstests/sharding/libs/resharding_test_fixture.js
index 4584436f82d..d04edd9e5fe 100644
--- a/jstests/sharding/libs/resharding_test_fixture.js
+++ b/jstests/sharding/libs/resharding_test_fixture.js
@@ -829,10 +829,6 @@ var ReshardingTest = class {
if (res.ok === 1) {
replSet.awaitNodesAgreeOnPrimary();
- // We wait for replication to ensure all nodes have finished their rollback before
- // another round of rollback may triggered by the test. TODO SERVER-59721: Remove
- // this wait.
- replSet.awaitReplication();
assert.eq(newPrimary, replSet.getPrimary());
return;
}
@@ -846,9 +842,6 @@ var ReshardingTest = class {
jsTest.log(`ReshardingTestFixture failed to step up secondaries, trying to step` +
` original primary back up`);
replSet.stepUp(originalPrimary, {awaitReplicationBeforeStepUp: false});
- // We wait for replication to ensure all nodes have finished their rollback before another
- // round of rollback may triggered by the test. TODO SERVER-59721: Remove this wait.
- replSet.awaitReplication();
}
killAndRestartPrimaryOnShard(shardName) {
@@ -861,9 +854,6 @@ var ReshardingTest = class {
const opts = {allowedExitCode: MongoRunner.EXIT_SIGKILL};
replSet.restart(originalPrimaryConn, opts, SIGKILL);
replSet.awaitNodesAgreeOnPrimary();
- // We wait for replication to ensure all nodes have finished their rollback before another
- // round of rollback may triggered by the test. TODO SERVER-59721: Remove this wait.
- replSet.awaitReplication();
}
shutdownAndRestartPrimaryOnShard(shardName) {
@@ -876,9 +866,6 @@ var ReshardingTest = class {
const SIGTERM = 15;
replSet.restart(originalPrimaryConn, {}, SIGTERM);
replSet.awaitNodesAgreeOnPrimary();
- // We wait for replication to ensure all nodes have finished their rollback before another
- // round of rollback may triggered by the test. TODO SERVER-59721: Remove this wait.
- replSet.awaitReplication();
}
/**
diff --git a/jstests/sharding/resharding_secondary_recovers_temp_ns_metadata.js b/jstests/sharding/resharding_secondary_recovers_temp_ns_metadata.js
index f7360d2430c..b05adaeae81 100644
--- a/jstests/sharding/resharding_secondary_recovers_temp_ns_metadata.js
+++ b/jstests/sharding/resharding_secondary_recovers_temp_ns_metadata.js
@@ -40,12 +40,11 @@ reshardingTest.withReshardingInBackground( //
reshardingTest.stepUpNewPrimaryOnShard(recipientShardNames[0]);
assert.commandWorked(sourceCollection.insert({oldKey: 1, newKey: 2}));
- /* TODO SERVER-59721: Enable tests for update and remove
reshardingTest.stepUpNewPrimaryOnShard(recipientShardNames[0]);
assert.commandWorked(sourceCollection.update({oldKey: 1, newKey: 2}, {$set: {extra: 3}}));
reshardingTest.stepUpNewPrimaryOnShard(recipientShardNames[0]);
- assert.commandWorked(sourceCollection.remove({oldKey: 1, newKey: 2}, {justOne: true})); */
+ assert.commandWorked(sourceCollection.remove({oldKey: 1, newKey: 2}, {justOne: true}));
});
reshardingTest.teardown();
diff --git a/src/mongo/db/repl/README.md b/src/mongo/db/repl/README.md
index 949a2ae4ac3..51ee5fa6b00 100644
--- a/src/mongo/db/repl/README.md
+++ b/src/mongo/db/repl/README.md
@@ -271,10 +271,6 @@ make sure it actually is able to fetch from the sync source candidate’s oplog.
* If the oldest entry in the sync source candidate's oplog is newer than the node's newest entry,
then the node denylists that sync source candidate as well because the candidate is too far
ahead.
-* During initial sync, rollback, or recovery from unclean shutdown, nodes will set a specific
- OpTime, [**`minValid`**](#replication-timestamp-glossary), that they must reach before it is safe
- to read from the node and before the node can transition into `SECONDARY` state. If the secondary
- has a `minValid`, then the sync source candidate is checked for that `minValid` entry.
* The sync source's **RollbackID** is also fetched to be checked after the first batch is returned
by the `OplogFetcher`.
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp
index fa454a38619..2a839a8c521 100644
--- a/src/mongo/db/repl/bgsync.cpp
+++ b/src/mongo/db/repl/bgsync.cpp
@@ -297,16 +297,10 @@ void BackgroundSync::_produce() {
// find a target to sync from the last optime fetched
{
- OpTime minValidSaved;
- {
- auto opCtx = cc().makeOperationContext();
- minValidSaved = _replicationProcess->getConsistencyMarkers()->getMinValid(opCtx.get());
- }
stdx::lock_guard<Latch> lock(_mutex);
if (_state != ProducerState::Running) {
return;
}
- const auto requiredOpTime = (minValidSaved > _lastOpTimeFetched) ? minValidSaved : OpTime();
lastOpTimeFetched = _lastOpTimeFetched;
if (!_syncSourceHost.empty()) {
LOGV2(21080,
@@ -319,7 +313,7 @@ void BackgroundSync::_produce() {
_replicationCoordinatorExternalState->getTaskExecutor(),
_replCoord,
lastOpTimeFetched,
- requiredOpTime,
+ OpTime(),
[&syncSourceResp](const SyncSourceResolverResponse& resp) { syncSourceResp = resp; });
}
// This may deadlock if called inside the mutex because SyncSourceResolver::startup() calls
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index b45d6cad5e8..25efb7ba448 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -3344,6 +3344,7 @@ void TopologyCoordinator::processReplSetRequestVotes(const ReplSetRequestVotesAr
if (!args.isADryRun()) {
_lastVote.setTerm(args.getTerm());
_lastVote.setCandidateIndex(args.getCandidateIndex());
+ LOGV2_DEBUG(5972100, 0, "Voting yes in election");
}
response->setVoteGranted(true);
}