diff options
author | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2020-02-27 13:28:15 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-03-04 00:50:46 +0000 |
commit | 35a5d455672e65127a24e7cdb98ea1472124af4a (patch) | |
tree | d2352b685389089e757e52c3d0dae317f7f92d42 | |
parent | 5e607a45d34a4f977341591eec107a7a8a361626 (diff) | |
download | mongo-35a5d455672e65127a24e7cdb98ea1472124af4a.tar.gz |
SERVER-45997 Test resyncing a majority member against a minority node
-rw-r--r-- | jstests/replsets/resync_majority_member.js | 107 | ||||
-rw-r--r-- | src/mongo/db/repl/rs_rollback.cpp | 12 |
2 files changed, 113 insertions, 6 deletions
diff --git a/jstests/replsets/resync_majority_member.js b/jstests/replsets/resync_majority_member.js new file mode 100644 index 00000000000..265c03d3406 --- /dev/null +++ b/jstests/replsets/resync_majority_member.js @@ -0,0 +1,107 @@ +/** + * This test resyncs a majority member against a minority node, so that it no longer has + * a write it originally helped commit. It then switches primaries and begins a new branch + * of history, so that same write is now in the minority. The only remaining member to still + * have that write is forced to (try to) roll back, and it crashes as it refuses to roll back + * majority-committed writes. + * + * @tags: [multiversion_incompatible] + */ + +(function() { +"use strict"; + +load("jstests/libs/write_concern_util.js"); + +TestData.skipCheckDBHashes = true; // the set is not consistent when we shutdown the test + +const dbName = "testdb"; +const collName = "testcoll"; + +const name = jsTestName(); +const rst = new ReplSetTest({ + name: name, + nodes: [{}, {}, {rsConfig: {priority: 0}}], + useBridge: true, + settings: {chainingAllowed: false, catchupTimeoutMillis: 0 /* disable primary catchup */}, +}); +rst.startSet(); +rst.initiateWithHighElectionTimeout(); + +const primary = rst.getPrimary(); +const primaryDb = primary.getDB(dbName); +const primaryColl = primaryDb.getCollection(collName); +assert.commandWorked(primaryColl.insert({"starting": "doc", writeConcern: {w: 3}})); + +/** + * Node 1: is primary, will roll back (included in the majority) + * Node 2: node to roll back against (minority node) + * Node 3: node to resync (originally included in majority, resyncs and loses write) + */ + +const rollbackNode = primary; +const syncSource = rst.getSecondaries()[0]; +let resyncNode = rst.getSecondaries()[1]; + +// Disable replication on node 2 so that only nodes 1 and 3 have the next write. +stopServerReplication(syncSource); + +const disappearingDoc = { + "harry": "houdini" +}; +assert.commandWorked(primaryColl.insert(disappearingDoc, {writeConcern: {w: "majority"}})); + +// Isolate the old primary so it cannot try to pass on its write again. +rollbackNode.disconnect(syncSource); +rollbackNode.disconnect(resyncNode); + +// Resync the last node against the minority member. We will lose the write on that node. +resyncNode = rst.restart(resyncNode, { + startClean: true, + setParameter: { + "failpoint.initialSyncHangBeforeFinish": tojson({mode: "alwaysOn"}), + "failpoint.forceSyncSourceCandidate": + tojson({mode: "alwaysOn", data: {"hostAndPort": syncSource.host}}), + "numInitialSyncAttempts": 1 + } +}); + +assert.commandWorked(resyncNode.adminCommand({ + waitForFailPoint: "initialSyncHangBeforeFinish", + timesEntered: 1, + maxTimeMS: kDefaultWaitForFailPointTimeout +})); +assert.commandWorked( + resyncNode.adminCommand({configureFailPoint: "initialSyncHangBeforeFinish", mode: "off"})); + +assert.commandWorked( + rollbackNode.adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: true})); +rst.waitForState(rollbackNode, ReplSetTest.State.SECONDARY); + +restartServerReplication(syncSource); + +// Now elect node 2, the minority member. +assert.commandWorked(syncSource.adminCommand({replSetStepUp: 1})); +assert.eq(syncSource, rst.getPrimary()); +assert.commandWorked(syncSource.getDB(dbName).getCollection(collName).insert( + {"new": "data"}, {writeConcern: {w: "majority"}})); + +// Node 1 will have to roll back to rejoin the set. It will crash as it will refuse to roll back +// majority committed data. +rollbackNode.reconnect(syncSource); +rollbackNode.reconnect(resyncNode); + +assert.soon(() => { + return rawMongoProgramOutput().search( + /Invariant.*commonPointOpTime\.getTimestamp\(\) \>\= lastCommittedOpTime\.getTimestamp\(\)/) != + -1; +}); + +// Observe that the old write does not exist anywhere in the set. +syncSource.setSlaveOk(); +resyncNode.setSlaveOk(); +assert.eq(0, syncSource.getDB(dbName)[collName].find(disappearingDoc).itcount()); +assert.eq(0, resyncNode.getDB(dbName)[collName].find(disappearingDoc).itcount()); + +rst.stopSet(); +})();
\ No newline at end of file diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp index 5ddf035ce51..bc726969362 100644 --- a/src/mongo/db/repl/rs_rollback.cpp +++ b/src/mongo/db/repl/rs_rollback.cpp @@ -1224,19 +1224,19 @@ Status _syncRollback(OperationContext* opCtx, << e.what()); } - OpTime commonPoint = how.commonPoint; + OpTime commonPointOpTime = how.commonPoint; OpTime lastCommittedOpTime = replCoord->getLastCommittedOpTime(); OpTime committedSnapshot = replCoord->getCurrentCommittedSnapshotOpTime(); - LOGV2(21683, "Rollback common point is {commonPoint}", "commonPoint"_attr = commonPoint); + LOGV2(21683, "Rollback common point is {commonPoint}", "commonPoint"_attr = commonPointOpTime); // Rollback common point should be >= the replication commit point. - invariant(commonPoint.getTimestamp() >= lastCommittedOpTime.getTimestamp()); - invariant(commonPoint >= lastCommittedOpTime); + invariant(commonPointOpTime.getTimestamp() >= lastCommittedOpTime.getTimestamp()); + invariant(commonPointOpTime >= lastCommittedOpTime); // Rollback common point should be >= the committed snapshot optime. - invariant(commonPoint.getTimestamp() >= committedSnapshot.getTimestamp()); - invariant(commonPoint >= committedSnapshot); + invariant(commonPointOpTime.getTimestamp() >= committedSnapshot.getTimestamp()); + invariant(commonPointOpTime >= committedSnapshot); try { // It is always safe to increment the rollback ID first, even if we fail to complete |