summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSpencer T Brody <spencer@mongodb.com>2016-11-03 13:55:18 -0400
committerSpencer T Brody <spencer@mongodb.com>2016-11-07 11:19:45 -0500
commitf5c613f11f170cadd4b698d108b3c7b636913568 (patch)
tree64979b2ec39420c450bc833f9750082808c740c9
parent52d16440aeb88962e579fc56e314c388f7c70ded (diff)
downloadmongo-f5c613f11f170cadd4b698d108b3c7b636913568.tar.gz
SERVER-25145 Add jstest to ensure that we can no longer trigger double-rollback.
-rw-r--r--jstests/replsets/double_rollback.js157
-rw-r--r--src/mongo/db/repl/rs_rollback.cpp14
-rw-r--r--src/mongo/shell/replsettest.js20
3 files changed, 191 insertions, 0 deletions
diff --git a/jstests/replsets/double_rollback.js b/jstests/replsets/double_rollback.js
new file mode 100644
index 00000000000..d596fc6d93a
--- /dev/null
+++ b/jstests/replsets/double_rollback.js
@@ -0,0 +1,157 @@
+/*
+ * This test causes node 2 to enter rollback, reach the common point, and exit rollback, but before
+ * it can apply operations to bring it back to a consistent state, switch sync sources to the node
+ * that originally gave it the ops it is now rolling back (node 0). This test then verifies that
+ * node 2 refuses to use node0 as a sync source because it doesn't contain the minValid document
+ * it needs to reach consistency. Node 2 is then allowed to reconnect to the node it was
+ * originally rolling back against (node 1) and finish its rollback. This is a regression test
+ * against the case where we *did* allow node 2 to sync from node 0 which gave it the very ops
+ * it rolled back, which could then lead to a double-rollback when node 2 was reconnected
+ * to node 1 and tried to apply its oplog despite not being in a consistent state.
+ */
+(function() {
+ 'use strict';
+
+ var name = "double_rollback";
+ var dbName = "test";
+ var collName = "double_rollback";
+
+ var rst = new ReplSetTest({
+ name: name,
+ nodes: [
+ {},
+ {},
+ {rsConfig: {priority: 0}},
+ {rsConfig: {arbiterOnly: true}},
+ {rsConfig: {arbiterOnly: true}}
+ ],
+ useBridge: true
+ });
+ var nodes = rst.startSet();
+ rst.initiate();
+
+ function waitForState(node, state) {
+ assert.soonNoExcept(function() {
+ assert.commandWorked(node.adminCommand(
+ {replSetTest: 1, waitForMemberState: state, timeoutMillis: rst.kDefaultTimeoutMs}));
+ return true;
+ });
+ };
+
+ function stepUp(rst, node) {
+ var primary = rst.getPrimary();
+ if (primary != node) {
+ assert.commandWorked(primary.adminCommand({replSetStepDown: 1}));
+ }
+ waitForState(node, ReplSetTest.State.PRIMARY);
+ }
+
+ jsTestLog("Make sure node 0 is primary.");
+ stepUp(rst, nodes[0]);
+ assert.eq(nodes[0], rst.getPrimary());
+ // Wait for all data bearing nodes to get up to date.
+ assert.writeOK(nodes[0].getDB(dbName).getCollection(collName).insert(
+ {a: 1}, {writeConcern: {w: 3, wtimeout: 5 * 60 * 1000}}));
+
+ jsTestLog("Create two partitions: [1] and [0,2,3,4].");
+ nodes[1].disconnect(nodes[0]);
+ nodes[1].disconnect(nodes[2]);
+ nodes[1].disconnect(nodes[3]);
+ nodes[1].disconnect(nodes[4]);
+
+ jsTestLog("Do a write that is replicated to [0,2,3,4].");
+ assert.writeOK(nodes[0].getDB(dbName).getCollection(collName + "2").insert({a: 2}, {
+ writeConcern: {w: 2, wtimeout: 5 * 60 * 1000}
+ }));
+
+ jsTestLog("Repartition to: [0,2] and [1,3,4].");
+ nodes[1].reconnect(nodes[3]);
+ nodes[1].reconnect(nodes[4]);
+ nodes[3].disconnect(nodes[0]);
+ nodes[3].disconnect(nodes[2]);
+ nodes[4].disconnect(nodes[0]);
+ nodes[4].disconnect(nodes[2]);
+
+ jsTestLog("Ensure that 0 steps down and that 1 becomes primary.");
+ waitForState(nodes[0], ReplSetTest.State.SECONDARY);
+ waitForState(nodes[1], ReplSetTest.State.PRIMARY);
+ assert.eq(nodes[1], rst.getPrimary());
+
+ jsTestLog("Do a write to node 1 on the [1,3,4] side of the partition.");
+ assert.writeOK(nodes[1].getDB(dbName).getCollection(collName + "3").insert({a: 3}));
+
+ // Turn on failpoint on node 2 to pause rollback after oplog is truncated and minValid is set.
+ assert.commandWorked(nodes[2].getDB('admin').runCommand(
+ {configureFailPoint: 'rollbackHangBeforeFinish', mode: 'alwaysOn'}));
+
+ jsTestLog("Repartition to: [0] and [1,2,3,4].");
+ nodes[2].disconnect(nodes[0]);
+ nodes[2].reconnect(nodes[1]);
+ nodes[2].reconnect(nodes[3]);
+ nodes[2].reconnect(nodes[4]);
+
+ jsTestLog("Wait for node 2 to go into ROLLBACK and start syncing from node 1.");
+ // Since nodes 1 and 2 have now diverged, node 2 should go into rollback.
+ waitForState(nodes[2], ReplSetTest.State.ROLLBACK);
+ rst.awaitSyncSource(nodes[2], nodes[1]);
+
+ jsTestLog("Wait for failpoint on node 2 to pause rollback before it finishes");
+ // Wait for fail point message to be logged.
+ var checkLog = function(node, msg) {
+ assert.soon(function() {
+ var logMessages = assert.commandWorked(node.adminCommand({getLog: 'global'})).log;
+ for (var i = 0; i < logMessages.length; i++) {
+ if (logMessages[i].indexOf(msg) != -1) {
+ return true;
+ }
+ }
+ return false;
+ }, 'Did not see a log entry containing the following message: ' + msg);
+ };
+ checkLog(nodes[2], 'rollback - rollbackHangBeforeFinish fail point enabled');
+
+ jsTestLog("Repartition to: [1,3,4] and [0,2].");
+ nodes[2].disconnect(nodes[1]);
+ nodes[2].reconnect(nodes[0]);
+
+ // Turn off failpoint on node 2 to allow rollback to finish.
+ assert.commandWorked(nodes[2].getDB('admin').runCommand(
+ {configureFailPoint: 'rollbackHangBeforeFinish', mode: 'off'}));
+
+ jsTestLog("Wait for node 2 exit ROLLBACK state and go into RECOVERING");
+ waitForState(nodes[2], ReplSetTest.State.RECOVERING);
+
+ // At this point node 2 has truncated its oplog back to the common point and is looking
+ // for a sync source it can use to reach minvalid and get back into SECONDARY state. Node 0
+ // is the only node it can reach, but since node 0 doesn't contain node 2's minvalid oplog entry
+ // node 2 will refuse to use it as a sync source.
+ checkLog(nodes[2],
+ "remote oplog does not contain entry with optime matching our required optime");
+
+ var node0RBID = nodes[0].adminCommand('replSetGetRBID').rbid;
+ var node1RBID = nodes[1].adminCommand('replSetGetRBID').rbid;
+
+ jsTestLog("Reconnect all nodes.");
+ nodes[0].reconnect(nodes[1]);
+ nodes[0].reconnect(nodes[3]);
+ nodes[0].reconnect(nodes[4]);
+ nodes[2].reconnect(nodes[1]);
+ nodes[2].reconnect(nodes[3]);
+ nodes[2].reconnect(nodes[4]);
+
+ jsTestLog("Wait for nodes 0 to roll back and both node 0 and 2 to catch up to node 1");
+ waitForState(nodes[0], ReplSetTest.State.SECONDARY);
+ waitForState(nodes[2], ReplSetTest.State.SECONDARY);
+ rst.awaitReplication();
+
+ // Check that rollback happened on node 0, but not on node 2 since it had already rolled back
+ // and just needed to finish applying ops to reach minValid.
+ assert.neq(node0RBID, nodes[0].adminCommand('replSetGetRBID').rbid);
+ assert.eq(node1RBID, nodes[1].adminCommand('replSetGetRBID').rbid);
+
+ // Node 1 should still be primary, and should now be able to satisfy majority writes again.
+ assert.writeOK(nodes[1].getDB(dbName).getCollection(collName + "4").insert({a: 4}, {
+ writeConcern: {w: 3, wtimeout: 5 * 60 * 1000}
+ }));
+
+}()); \ No newline at end of file
diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp
index b1fd726d062..352e4625cb3 100644
--- a/src/mongo/db/repl/rs_rollback.cpp
+++ b/src/mongo/db/repl/rs_rollback.cpp
@@ -63,6 +63,7 @@
#include "mongo/db/repl/rollback_source.h"
#include "mongo/db/repl/rslog.h"
#include "mongo/db/repl/storage_interface.h"
+#include "mongo/util/fail_point_service.h"
#include "mongo/util/log.h"
/* Scenarios
@@ -116,6 +117,10 @@ using std::string;
using std::pair;
namespace repl {
+
+// Failpoint which causes rollback to hang before finishing.
+MONGO_FP_DECLARE(rollbackHangBeforeFinish);
+
namespace {
class RSFatalException : public std::exception {
@@ -895,6 +900,15 @@ Status _syncRollback(OperationContext* txn,
}
replCoord->incrementRollbackID();
+ if (MONGO_FAIL_POINT(rollbackHangBeforeFinish)) {
+ // This log output is used in js tests so please leave it.
+ log() << "rollback - rollbackHangBeforeFinish fail point "
+ "enabled. Blocking until fail point is disabled.";
+ while (MONGO_FAIL_POINT(rollbackHangBeforeFinish)) {
+ mongo::sleepsecs(1);
+ }
+ }
+
// Success; leave "ROLLBACK" state intact until applier thread has reloaded the new minValid.
// Otherwise, the applier could transition the node to SECONDARY with an out-of-date minValid.
return Status::OK();
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index 0acb2a4d924..43d4c5e835a 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -496,6 +496,26 @@ var ReplSetTest = function(opts) {
};
/**
+ * Blocks until the specified node says it's syncing from the given upstream node.
+ */
+ this.awaitSyncSource = function(node, upstreamNode, timeout) {
+ print("Waiting for node " + node.name + " to start syncing from " + upstreamNode.name);
+ var status = null;
+ assert.soonNoExcept(
+ function() {
+ status = node.getDB("admin").runCommand({replSetGetStatus: 1});
+ for (var j = 0; j < status.members.length; j++) {
+ if (status.members[j].self) {
+ return status.members[j].syncingTo === upstreamNode.host;
+ }
+ }
+ return false;
+ },
+ "Awaiting node " + node + " syncing from " + upstreamNode + ": " + tojson(status),
+ timeout);
+ };
+
+ /**
* Blocks until all nodes agree on who the primary is.
*/
this.awaitNodesAgreeOnPrimary = function(timeout) {