summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSpencer T Brody <spencer@mongodb.com>2016-11-03 13:55:18 -0400
committerSpencer T Brody <spencer@mongodb.com>2016-11-06 17:24:19 -0500
commitf3a4d0914fca1ad2fc9a659f4ece42f9dcf0b2c2 (patch)
tree00cdbb29087acf43a6466b5d0bfcdc0515b4dc89
parentc3e843b10cd3522fd5f5db71486cfc738c88428d (diff)
downloadmongo-f3a4d0914fca1ad2fc9a659f4ece42f9dcf0b2c2.tar.gz
SERVER-25145 Add jstest to ensure that we can no longer trigger double-rollback.
(cherry picked from commit 3716f0ab00f8f2f4e5e85a87dc06c125628fe03e)
-rw-r--r--jstests/replsets/double_rollback.js161
-rw-r--r--src/mongo/db/repl/rs_rollback.cpp14
-rw-r--r--src/mongo/shell/replsettest.js20
3 files changed, 195 insertions, 0 deletions
diff --git a/jstests/replsets/double_rollback.js b/jstests/replsets/double_rollback.js
new file mode 100644
index 00000000000..7f5ec3c4fb7
--- /dev/null
+++ b/jstests/replsets/double_rollback.js
@@ -0,0 +1,161 @@
+/*
+ * This test causes node 2 to enter rollback, reach the common point, and exit rollback, but before
+ * it can apply operations to bring it back to a consistent state, switch sync sources to the node
+ * that originally gave it the ops it is now rolling back (node 0). This test then verifies that
+ * node 2 refuses to use node0 as a sync source because it doesn't contain the minValid document
+ * it needs to reach consistency. Node 2 is then allowed to reconnect to the node it was
+ * originally rolling back against (node 1) and finish its rollback. This is a regression test
+ * against the case where we *did* allow node 2 to sync from node 0 which gave it the very ops
+ * it rolled back, which could then lead to a double-rollback when node 2 was reconnected
+ * to node 1 and tried to apply its oplog despite not being in a consistent state.
+ */
+(function() {
+ 'use strict';
+
+ var name = "double_rollback";
+ var dbName = "test";
+ var collName = "double_rollback";
+
+ var rst = new ReplSetTest({
+ name: name,
+ nodes: [
+ {},
+ {},
+ {rsConfig: {priority: 0}},
+ {rsConfig: {arbiterOnly: true}},
+ {rsConfig: {arbiterOnly: true}}
+ ],
+ useBridge: true
+ });
+ var nodes = rst.startSet();
+ rst.initiate();
+
+ var timeout = 5 * 60 * 1000;
+
+ function waitForState(node, state) {
+ assert.soonNoExcept(function() {
+ assert.commandWorked(node.adminCommand(
+ {replSetTest: 1, waitForMemberState: state, timeoutMillis: timeout}));
+ return true;
+ });
+ };
+
+ function stepUp(rst, node) {
+ var primary = rst.getPrimary();
+ if (primary != node) {
+ assert.commandWorked(primary.adminCommand({replSetStepDown: 1}));
+ }
+ waitForState(node, ReplSetTest.State.PRIMARY);
+ }
+
+ jsTestLog("Make sure node 0 is primary.");
+ stepUp(rst, nodes[0]);
+ assert.eq(nodes[0], rst.getPrimary());
+ // Wait for all data bearing nodes to get up to date.
+ assert.writeOK(nodes[0].getDB(dbName).getCollection(collName).insert(
+ {a: 1}, {writeConcern: {w: 3, wtimeout: timeout}}));
+
+ jsTestLog("Create two partitions: [1] and [0,2,3,4].");
+ nodes[1].disconnect(nodes[0]);
+ nodes[1].disconnect(nodes[2]);
+ nodes[1].disconnect(nodes[3]);
+ nodes[1].disconnect(nodes[4]);
+
+ jsTestLog("Do a write that is replicated to [0,2,3,4].");
+ assert.writeOK(nodes[0]
+ .getDB(dbName)
+ .getCollection(collName + "2")
+ .insert({a: 2}, {writeConcern: {w: 2, wtimeout: timeout}}));
+
+ jsTestLog("Repartition to: [0,2] and [1,3,4].");
+ nodes[1].reconnect(nodes[3]);
+ nodes[1].reconnect(nodes[4]);
+ nodes[3].disconnect(nodes[0]);
+ nodes[3].disconnect(nodes[2]);
+ nodes[4].disconnect(nodes[0]);
+ nodes[4].disconnect(nodes[2]);
+
+ jsTestLog("Ensure that 0 steps down and that 1 becomes primary.");
+ waitForState(nodes[0], ReplSetTest.State.SECONDARY);
+ waitForState(nodes[1], ReplSetTest.State.PRIMARY);
+ assert.eq(nodes[1], rst.getPrimary());
+
+ jsTestLog("Do a write to node 1 on the [1,3,4] side of the partition.");
+ assert.writeOK(nodes[1].getDB(dbName).getCollection(collName + "3").insert({a: 3}));
+
+ // Turn on failpoint on node 2 to pause rollback after oplog is truncated and minValid is set.
+ assert.commandWorked(nodes[2].getDB('admin').runCommand(
+ {configureFailPoint: 'rollbackHangBeforeFinish', mode: 'alwaysOn'}));
+
+ jsTestLog("Repartition to: [0] and [1,2,3,4].");
+ nodes[2].disconnect(nodes[0]);
+ nodes[2].reconnect(nodes[1]);
+ nodes[2].reconnect(nodes[3]);
+ nodes[2].reconnect(nodes[4]);
+
+ jsTestLog("Wait for node 2 to go into ROLLBACK and start syncing from node 1.");
+ // Since nodes 1 and 2 have now diverged, node 2 should go into rollback.
+ waitForState(nodes[2], ReplSetTest.State.ROLLBACK);
+ rst.awaitSyncSource(nodes[2], nodes[1], timeout);
+
+ jsTestLog("Wait for failpoint on node 2 to pause rollback before it finishes");
+ // Wait for fail point message to be logged.
+ var checkLog = function(node, msg) {
+ assert.soon(function() {
+ var logMessages = assert.commandWorked(node.adminCommand({getLog: 'global'})).log;
+ for (var i = 0; i < logMessages.length; i++) {
+ if (logMessages[i].indexOf(msg) != -1) {
+ return true;
+ }
+ }
+ return false;
+ }, 'Did not see a log entry containing the following message: ' + msg, timeout);
+ };
+ checkLog(nodes[2], 'rollback - rollbackHangBeforeFinish fail point enabled');
+
+ jsTestLog("Repartition to: [1,3,4] and [0,2].");
+ nodes[2].disconnect(nodes[1]);
+ nodes[2].reconnect(nodes[0]);
+
+ // Turn off failpoint on node 2 to allow rollback to finish.
+ assert.commandWorked(nodes[2].getDB('admin').runCommand(
+ {configureFailPoint: 'rollbackHangBeforeFinish', mode: 'off'}));
+
+ jsTestLog("Wait for node 2 exit ROLLBACK state and go into RECOVERING");
+ waitForState(nodes[2], ReplSetTest.State.RECOVERING);
+
+ // At this point node 2 has truncated its oplog back to the common point and is looking
+ // for a sync source it can use to reach minvalid and get back into SECONDARY state. Node 0
+ // is the only node it can reach, but since node 0 doesn't contain node 2's minvalid oplog entry
+ // node 2 will refuse to use it as a sync source.
+ checkLog(nodes[2],
+ "remote oplog does not contain entry with optime matching our required optime");
+
+ var node0RBID = nodes[0].adminCommand('replSetGetRBID').rbid;
+ var node1RBID = nodes[1].adminCommand('replSetGetRBID').rbid;
+
+ jsTestLog("Reconnect all nodes.");
+ nodes[0].reconnect(nodes[1]);
+ nodes[0].reconnect(nodes[3]);
+ nodes[0].reconnect(nodes[4]);
+ nodes[2].reconnect(nodes[1]);
+ nodes[2].reconnect(nodes[3]);
+ nodes[2].reconnect(nodes[4]);
+
+ jsTestLog("Wait for nodes 0 to roll back and both node 0 and 2 to catch up to node 1");
+ waitForState(nodes[0], ReplSetTest.State.SECONDARY);
+ waitForState(nodes[2], ReplSetTest.State.SECONDARY);
+ rst.awaitReplication(timeout);
+
+ // Check that rollback happened on node 0, but not on node 2 since it had already rolled back
+ // and just needed to finish applying ops to reach minValid.
+ assert.neq(node0RBID, nodes[0].adminCommand('replSetGetRBID').rbid);
+ assert.eq(node1RBID, nodes[1].adminCommand('replSetGetRBID').rbid);
+
+ // Node 1 should still be primary, and should now be able to satisfy majority writes again.
+ assert.writeOK(nodes[1]
+ .getDB(dbName)
+ .getCollection(collName + "4")
+ .insert({a: 4}, {writeConcern: {w: 3, wtimeout: timeout}}));
+
+}()); \ No newline at end of file
diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp
index 7b40e510127..c05c5a7d69c 100644
--- a/src/mongo/db/repl/rs_rollback.cpp
+++ b/src/mongo/db/repl/rs_rollback.cpp
@@ -62,6 +62,7 @@
#include "mongo/db/repl/roll_back_local_operations.h"
#include "mongo/db/repl/rollback_source.h"
#include "mongo/db/repl/rslog.h"
+#include "mongo/util/fail_point_service.h"
#include "mongo/util/log.h"
/* Scenarios
@@ -115,6 +116,10 @@ using std::string;
using std::pair;
namespace repl {
+
+// Failpoint which causes rollback to hang before finishing.
+MONGO_FP_DECLARE(rollbackHangBeforeFinish);
+
namespace {
class RSFatalException : public std::exception {
@@ -886,6 +891,15 @@ Status _syncRollback(OperationContext* txn,
}
replCoord->incrementRollbackID();
+ if (MONGO_FAIL_POINT(rollbackHangBeforeFinish)) {
+ // This log output is used in js tests so please leave it.
+ log() << "rollback - rollbackHangBeforeFinish fail point "
+ "enabled. Blocking until fail point is disabled.";
+ while (MONGO_FAIL_POINT(rollbackHangBeforeFinish)) {
+ mongo::sleepsecs(1);
+ }
+ }
+
// Success; leave "ROLLBACK" state intact until applier thread has reloaded the new minValid.
// Otherwise, the applier could transition the node to SECONDARY with an out-of-date minValid.
return Status::OK();
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index b9776fd0de1..fb0807909fb 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -422,6 +422,26 @@ var ReplSetTest = function(opts) {
};
/**
+ * Blocks until the specified node says it's syncing from the given upstream node.
+ */
+ this.awaitSyncSource = function(node, upstreamNode, timeout) {
+ print("Waiting for node " + node.name + " to start syncing from " + upstreamNode.name);
+ var status = null;
+ assert.soonNoExcept(
+ function() {
+ status = node.getDB("admin").runCommand({replSetGetStatus: 1});
+ for (var j = 0; j < status.members.length; j++) {
+ if (status.members[j].self) {
+ return status.members[j].syncingTo === upstreamNode.host;
+ }
+ }
+ return false;
+ },
+ "Awaiting node " + node + " syncing from " + upstreamNode + ": " + tojson(status),
+ timeout);
+ };
+
+ /**
* Blocks until all nodes agree on who the primary is.
*/
this.awaitNodesAgreeOnPrimary = function(timeout) {