summaryrefslogtreecommitdiff
path: root/jstests
diff options
context:
space:
mode:
authorSpencer T Brody <spencer@mongodb.com>2016-11-16 18:00:03 -0500
committerSpencer T Brody <spencer@mongodb.com>2016-11-17 16:15:50 -0500
commit8347e322cd46e8ee847e1730a7e94ea8e3981c53 (patch)
treebe83dbb5d51b2742c25e2a522ed9313b63dbb668 /jstests
parentf948e73df5148039b2fc8643a14635d9d982be7a (diff)
downloadmongo-8347e322cd46e8ee847e1730a7e94ea8e3981c53.tar.gz
SERVER-27053 Don't acknowledge writes if the term has changed.
Diffstat (limited to 'jstests')
-rw-r--r--jstests/replsets/write_concern_after_stepdown.js120
-rw-r--r--jstests/replsets/write_concern_after_stepdown_and_stepup.js134
2 files changed, 254 insertions, 0 deletions
diff --git a/jstests/replsets/write_concern_after_stepdown.js b/jstests/replsets/write_concern_after_stepdown.js
new file mode 100644
index 00000000000..c8493ea4fb6
--- /dev/null
+++ b/jstests/replsets/write_concern_after_stepdown.js
@@ -0,0 +1,120 @@
+/*
+ * Tests that heartbeats containing writes from a different branch of history can't cause a stale
+ * primary to incorrectly acknowledge a w:majority write that's about to be rolled back.
+ */
+(function() {
+ 'use strict';
+
+ var name = "writeConcernStepDownAndBackUp";
+ var dbName = "wMajorityCheck";
+ var collName = "stepdownAndBackUp";
+
+ var rst = new ReplSetTest({
+ name: name,
+ nodes: [
+ {},
+ {},
+ {rsConfig: {priority: 0}},
+ ],
+ useBridge: true
+ });
+ var nodes = rst.startSet();
+ rst.initiate();
+
+ function waitForState(node, state) {
+ assert.soonNoExcept(function() {
+ assert.commandWorked(node.adminCommand(
+ {replSetTest: 1, waitForMemberState: state, timeoutMillis: rst.kDefaultTimeoutMS}));
+ return true;
+ });
+ }
+
+ function waitForPrimary(node) {
+ assert.soon(function() {
+ return node.adminCommand('ismaster').ismaster;
+ });
+ }
+
+ function stepUp(node) {
+ var primary = rst.getPrimary();
+ if (primary != node) {
+ assert.throws(function() {
+ primary.adminCommand({replSetStepDown: 60 * 5});
+ });
+ }
+ waitForPrimary(node);
+ }
+
+ jsTestLog("Make sure node 0 is primary.");
+ stepUp(nodes[0]);
+ var primary = rst.getPrimary();
+ var secondaries = rst.getSecondaries();
+ assert.eq(nodes[0], primary);
+ // Wait for all data bearing nodes to get up to date.
+ assert.writeOK(nodes[0].getDB(dbName).getCollection(collName).insert(
+ {a: 1}, {writeConcern: {w: 3, wtimeout: rst.kDefaultTimeoutMS}}));
+
+ // Stop the secondaries from replicating.
+ secondaries.forEach(function(node) {
+ assert.commandWorked(
+ node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'alwaysOn'}));
+ });
+ // Stop the primary from being able to complete stepping down.
+ assert.commandWorked(
+ nodes[0].adminCommand({configureFailPoint: 'blockHeartbeatStepdown', mode: 'alwaysOn'}));
+
+ jsTestLog("Do w:majority write that will block waiting for replication.");
+ var doMajorityWrite = function() {
+ var res = db.getSiblingDB('wMajorityCheck').stepdownAndBackUp.insert({a: 2}, {
+ writeConcern: {w: 'majority'}
+ });
+ assert.writeErrorWithCode(res, ErrorCodes.PrimarySteppedDown);
+ };
+
+ var joinMajorityWriter = startParallelShell(doMajorityWrite, nodes[0].port);
+
+ jsTest.log("Disconnect primary from all secondaries");
+ nodes[0].disconnect(nodes[1]);
+ nodes[0].disconnect(nodes[2]);
+
+ jsTest.log("Wait for a new primary to be elected");
+ // Allow the secondaries to replicate again.
+ secondaries.forEach(function(node) {
+ assert.commandWorked(
+ node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'off'}));
+ });
+
+ waitForPrimary(nodes[1]);
+
+ jsTest.log("Do a write to the new primary");
+ assert.writeOK(nodes[1].getDB(dbName).getCollection(collName).insert(
+ {a: 3}, {writeConcern: {w: 2, wtimeout: rst.kDefaultTimeoutMS}}));
+
+ jsTest.log("Reconnect the old primary to the rest of the nodes");
+ // Only allow the old primary to connect to the other nodes, not the other way around.
+ // This is so that the old priamry will detect that it needs to step down and step itself down,
+ // rather than one of the other nodes detecting this and sending it a replSetStepDown command,
+ // which would cause the old primary to kill all operations and close all connections, making
+ // the way that the insert in the parallel shell fails be nondeterministic. Rather than
+ // handling all possible failure modes in the parallel shell, allowing heartbeat connectivity in
+ // only one direction makes it easier for the test to fail deterministically.
+ nodes[1].acceptConnectionsFrom(nodes[0]);
+ nodes[2].acceptConnectionsFrom(nodes[0]);
+
+ joinMajorityWriter();
+
+ // Allow the old primary to finish stepping down so that shutdown can finish.
+ var res = null;
+ try {
+ res = nodes[0].adminCommand({configureFailPoint: 'blockHeartbeatStepdown', mode: 'off'});
+ } catch (e) {
+ // Expected - once we disable the fail point the stepdown will proceed and it's racy whether
+ // the stepdown closes all connections before or after the configureFailPoint command
+ // returns
+ }
+ if (res) {
+ assert.commandWorked(res);
+ }
+
+ rst.stopSet();
+}());
diff --git a/jstests/replsets/write_concern_after_stepdown_and_stepup.js b/jstests/replsets/write_concern_after_stepdown_and_stepup.js
new file mode 100644
index 00000000000..ea1c6312eae
--- /dev/null
+++ b/jstests/replsets/write_concern_after_stepdown_and_stepup.js
@@ -0,0 +1,134 @@
+/*
+ * Tests that heartbeats containing writes from a different branch of history can't cause a stale
+ * primary to incorrectly acknowledge a w:majority write that's about to be rolled back, even if the
+ * stale primary is re-elected primary before waiting for the write concern acknowledgement.
+ */
+(function() {
+ 'use strict';
+
+ var name = "writeConcernStepDownAndBackUp";
+ var dbName = "wMajorityCheck";
+ var collName = "stepdownAndBackUp";
+
+ var rst = new ReplSetTest({
+ name: name,
+ nodes: [
+ {},
+ {},
+ {rsConfig: {priority: 0}},
+ ],
+ useBridge: true
+ });
+ var nodes = rst.startSet();
+ rst.initiate();
+
+ function waitForState(node, state) {
+ assert.soonNoExcept(function() {
+ assert.commandWorked(node.adminCommand(
+ {replSetTest: 1, waitForMemberState: state, timeoutMillis: rst.kDefaultTimeoutMS}));
+ return true;
+ });
+ }
+
+ function waitForPrimary(node) {
+ assert.soon(function() {
+ return node.adminCommand('ismaster').ismaster;
+ });
+ }
+
+ function stepUp(node) {
+ var primary = rst.getPrimary();
+ if (primary != node) {
+ assert.throws(function() {
+ primary.adminCommand({replSetStepDown: 60 * 5});
+ });
+ }
+ waitForPrimary(node);
+ }
+
+ jsTestLog("Make sure node 0 is primary.");
+ stepUp(nodes[0]);
+ var primary = rst.getPrimary();
+ var secondaries = rst.getSecondaries();
+ assert.eq(nodes[0], primary);
+ // Wait for all data bearing nodes to get up to date.
+ assert.writeOK(nodes[0].getDB(dbName).getCollection(collName).insert(
+ {a: 1}, {writeConcern: {w: 3, wtimeout: rst.kDefaultTimeoutMS}}));
+
+ // Stop the secondaries from replicating.
+ secondaries.forEach(function(node) {
+ assert.commandWorked(
+ node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'alwaysOn'}));
+ });
+ // Stop the primary from calling into awaitReplication()
+ assert.commandWorked(nodes[0].adminCommand(
+ {configureFailPoint: 'hangBeforeWaitingForWriteConcern', mode: 'alwaysOn'}));
+
+ jsTestLog("Do w:majority write that won't enter awaitReplication() until after the primary " +
+ "has stepped down and back up");
+ var doMajorityWrite = function() {
+ // Run ismaster command with 'hangUpOnStepDown' set to false to mark this connection as
+ // one that shouldn't be closed when the node steps down. This simulates the scenario where
+ // the write was coming from a mongos.
+ assert.commandWorked(db.adminCommand({ismaster: 1, hangUpOnStepDown: false}));
+
+ var res = db.getSiblingDB('wMajorityCheck').stepdownAndBackUp.insert({a: 2}, {
+ writeConcern: {w: 'majority'}
+ });
+ assert.writeErrorWithCode(res, ErrorCodes.PrimarySteppedDown);
+ };
+
+ var joinMajorityWriter = startParallelShell(doMajorityWrite, nodes[0].port);
+
+ jsTest.log("Disconnect primary from all secondaries");
+ nodes[0].disconnect(nodes[1]);
+ nodes[0].disconnect(nodes[2]);
+
+ jsTest.log("Wait for a new primary to be elected");
+ // Allow the secondaries to replicate again.
+ secondaries.forEach(function(node) {
+ assert.commandWorked(
+ node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'off'}));
+ });
+
+ waitForPrimary(nodes[1]);
+
+ jsTest.log("Do a write to the new primary");
+ assert.writeOK(nodes[1].getDB(dbName).getCollection(collName).insert(
+ {a: 3}, {writeConcern: {w: 2, wtimeout: rst.kDefaultTimeoutMS}}));
+
+ jsTest.log("Reconnect the old primary to the rest of the nodes");
+ nodes[0].reconnect(nodes[1]);
+ nodes[0].reconnect(nodes[2]);
+
+ jsTest.log("Wait for the old primary to step down, roll back its write, and apply the " +
+ "new writes from the new primary");
+ waitForState(nodes[0], ReplSetTest.State.SECONDARY);
+ rst.awaitReplication();
+
+ // At this point all 3 nodes should have the same data
+ assert.soonNoExcept(function() {
+ nodes.forEach(function(node) {
+ assert.eq(null,
+ node.getDB(dbName).getCollection(collName).findOne({a: 2}),
+ "Node " + node.host + " contained op that should have been rolled back");
+ assert.neq(null,
+ node.getDB(dbName).getCollection(collName).findOne({a: 3}),
+ "Node " + node.host +
+ " was missing op from branch of history that should have persisted");
+ });
+ return true;
+ });
+
+ jsTest.log("Make the original primary become primary once again");
+ stepUp(nodes[0]);
+
+ jsTest.log("Unblock the thread waiting for replication of the now rolled-back write, ensure " +
+ "that the write concern failed");
+ assert.commandWorked(nodes[0].adminCommand(
+ {configureFailPoint: 'hangBeforeWaitingForWriteConcern', mode: 'off'}));
+
+ joinMajorityWriter();
+
+ rst.stopSet();
+}());