summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Guo <robert.guo@10gen.com>2017-11-30 13:48:57 -0500
committerRobert Guo <robert.guo@10gen.com>2018-01-10 17:39:11 -0500
commit23a516c7d0acd9b9f571029c9f119069fdbfd73d (patch)
tree1ad05cc9590c8dfbd6f520f13a301f8af451bc7e
parentb89f752fe24dd4b24d59f39ea2f45b01aaca8250 (diff)
downloadmongo-23a516c7d0acd9b9f571029c9f119069fdbfd73d.tar.gz
SERVER-31917 add more logging to the rollback test fixture
-rw-r--r--jstests/replsets/libs/rollback_test.js189
-rw-r--r--jstests/replsets/rollback_all_op_types.js5
-rw-r--r--jstests/replsets/rollback_collmods.js5
-rw-r--r--jstests/replsets/rollback_drop_database.js7
-rw-r--r--jstests/replsets/rollback_drop_index_after_rename.js7
-rw-r--r--jstests/replsets/rollback_during_downgrade.js5
-rw-r--r--jstests/replsets/rollback_during_upgrade.js6
-rw-r--r--jstests/replsets/rollback_rename_collection_on_sync_source.js5
8 files changed, 159 insertions, 70 deletions
diff --git a/jstests/replsets/libs/rollback_test.js b/jstests/replsets/libs/rollback_test.js
index 12edd71dc92..0af1f6cb3a9 100644
--- a/jstests/replsets/libs/rollback_test.js
+++ b/jstests/replsets/libs/rollback_test.js
@@ -2,15 +2,14 @@
* Wrapper around ReplSetTest for testing rollback behavior. It allows the caller to easily
* transition between stages of a rollback without having to configure the replset explicitly.
*
- * This library exposes the following 3 sequential stages of rollback:
+ * This library exposes the following 5 sequential stages of rollback:
* 1. RollbackTest starts in kSteadyStateOps: the replica set is in steady state replication.
* Operations applied will be replicated.
* 2. kRollbackOps: operations applied during this phase will not be replicated and eventually be
* rolled back.
- * 3. kSyncSourceOps: operations applied during this stage will not be replicated initially,
- * causing the data-bearing nodes to diverge. The data will be replicated after other
- * nodes are rolled back.
- * 4. kSteadyStateOps: (back to stage 1) with the option of waiting for the rollback to finish.
+ * 3. kSyncSourceOpsBeforeRollback: apply operations on the sync source before rollback begins.
+ * 4. kSyncSourceOpsDuringRollback: apply operations on the sync source after rollback has begun.
+ * 5. kSteadyStateOps: (same as stage 1) with the option of waiting for the rollback to finish.
*
* Please refer to the various `transition*` functions for more information on the behavior
* of each stage.
@@ -19,16 +18,22 @@ load("jstests/replsets/rslib.js");
function RollbackTest(name = "RollbackTest") {
const State = {
- kStopped: "stopped",
- kRollbackOps: "doing operations that will be rolled back",
- kSyncSourceOps: "doing operations on sync source",
- kSteadyStateOps: "doing operations during steady state replication",
+ kStopped: "kStopped",
+ kRollbackOps: "kRollbackOps",
+ // DEPRECATED: Remove this line after TIG-680.
+ kSyncSourceOps: "kSyncSourceOpsBeforeRollback",
+ kSyncSourceOpsBeforeRollback: "kSyncSourceOpsBeforeRollback",
+ kSyncSourceOpsDuringRollback: "kSyncSourceOpsDuringRollback",
+ kSteadyStateOps: "kSteadyStateOps",
};
const AcceptableTransitions = {
[State.kStopped]: [],
- [State.kRollbackOps]: [State.kSyncSourceOps],
- [State.kSyncSourceOps]: [State.kSteadyStateOps],
+ [State.kRollbackOps]: [State.kSyncSourceOpsBeforeRollback],
+ // DEPRECATED: remove transition to State.kSteadyStateOps after TIG-680.
+ [State.kSyncSourceOpsBeforeRollback]:
+ [State.kSyncSourceOpsDuringRollback, State.kSteadyStateOps],
+ [State.kSyncSourceOpsDuringRollback]: [State.kSteadyStateOps],
[State.kSteadyStateOps]: [State.kStopped, State.kRollbackOps],
};
@@ -37,7 +42,9 @@ function RollbackTest(name = "RollbackTest") {
let curPrimary;
let curSecondary;
let arbiter;
- let curState;
+
+ let curState = State.kSteadyStateOps;
+ let lastRBID;
// Do more complicated instantiation in this init() function. This reduces the amount of code
// run at the class level (i.e. outside of any functions) and improves readability.
@@ -62,18 +69,36 @@ function RollbackTest(name = "RollbackTest") {
// Wait for the primary to be ready.
const actualPrimary = rst.getPrimary();
- assert.eq(actualPrimary, curPrimary, 'The ReplSetTest.node[0] is not the primary');
+ assert.eq(actualPrimary, curPrimary, "ReplSetTest.node[0] is not the primary");
- curState = State.kSteadyStateOps;
+ lastRBID = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid;
})();
+ function checkDataConsistency() {
+ assert.eq(curState,
+ State.kSteadyStateOps,
+ "Not in kSteadyStateOps state, cannot check data consistency");
+ const name = rst.name;
+ rst.checkOplogs(name);
+ rst.checkReplicatedDataHashes(name);
+ // TODO: SERVER-31920 run validate.
+ }
+
+ function log(msg, important = false) {
+ if (important) {
+ jsTestLog(`[${name}] ${msg}`);
+ } else {
+ print(`[${name}] ${msg}`);
+ }
+ }
+
/**
* return whether the cluster can transition from the current State to `newState`.
* @private
*/
function transitionIfAllowed(newState) {
if (AcceptableTransitions[curState].includes(newState)) {
- jsTestLog(`[${name}] Transitioning to: "${newState}"`);
+ log(`Transitioning to: "${newState}"`, true);
curState = newState;
} else {
// Transitioning to a disallowed State is likely a bug in the code, so we throw an
@@ -85,31 +110,45 @@ function RollbackTest(name = "RollbackTest") {
/**
* Transition from a rollback state to a steady state. Operations applied in this phase will
* be replicated to all nodes and should not be rolled back.
- *
- * @param waitForRollback specify whether to wait for rollback to finish before returning.
*/
- this.transitionToSteadyStateOperations = function(
- {waitForRollback: waitForRollback = true} = {}) {
- transitionIfAllowed(State.kSteadyStateOps);
+ this.transitionToSteadyStateOperations = function() {
- const originalRBID = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid;
-
- // Connect the secondary to everything else so it'll go into rollback.
+ // Ensure the secondary is connected. It may already have been connected from a previous
+ // stage.
+ log(`Ensuring the secondary ${curSecondary.host} is connected to the other nodes`);
curSecondary.reconnect([curPrimary, arbiter]);
- if (waitForRollback) {
- jsTestLog(`[${name}] Waiting for rollback to complete`);
- assert.soonNoExcept(() => {
+ log("Waiting for rollback to complete", true);
+ let rbid = -1;
+ assert.soon(() => {
+ try {
+ rbid = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid;
+ } catch (e) {
// Command can fail when sync source is being cleared.
- const rbid = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid;
- return rbid === originalRBID + 1;
- }, 'Timed out waiting for RBID to increment');
+ }
+ // Fail early if the rbid is greater than lastRBID+1.
+ assert.lte(rbid,
+ lastRBID + 1,
+ `RBID is too large. current RBID: ${rbid}, last RBID: ${lastRBID}`);
- rst.awaitSecondaryNodes();
- rst.awaitReplication();
+ return rbid === lastRBID + 1;
- jsTestLog(`[${name}] Rollback completed`);
- }
+ }, "Timed out waiting for RBID to increment");
+
+ rst.awaitSecondaryNodes();
+ rst.awaitReplication();
+
+ log("Rollback and awaitReplication completed", true);
+
+ // We call transition to steady state ops after awaiting replication has finished,
+ // otherwise it could be confusing to see operations being replicated when we're already
+ // in rollback complete state.
+ transitionIfAllowed(State.kSteadyStateOps);
+
+ // After the previous rollback (if any) has completed and await replication has finished,
+ // the replica set should be in a consistent and "fresh" state. We now prepare for the next
+ // rollback.
+ checkDataConsistency();
return curPrimary;
};
@@ -119,15 +158,17 @@ function RollbackTest(name = "RollbackTest") {
* its operations will eventually be rolled back.
*/
this.transitionToRollbackOperations = function() {
- transitionIfAllowed(State.kRollbackOps);
-
// Ensure previous operations are replicated. The current secondary will be used as the sync
// source later on, so it must be up-to-date to prevent any previous operations from being
// rolled back.
+ rst.awaitSecondaryNodes();
rst.awaitReplication();
+ transitionIfAllowed(State.kRollbackOps);
+
// Disconnect the current primary from the secondary so operations on it will eventually be
// rolled back. But do not disconnect it from the arbiter so it can stay as the primary.
+ log(`Isolating the primary ${curPrimary.host} from the secondary ${curSecondary.host}`);
curPrimary.disconnect([curSecondary]);
return curPrimary;
@@ -139,46 +180,90 @@ function RollbackTest(name = "RollbackTest") {
* primary so that that optimes diverge and previous operations on the old primary will be
* rolled back.
*/
- this.transitionToSyncSourceOperations = function() {
- transitionIfAllowed(State.kSyncSourceOps);
+ this.transitionToSyncSourceOperationsBeforeRollback = function() {
+ transitionIfAllowed(State.kSyncSourceOpsBeforeRollback);
+
+ // Insert one document to ensure rollback will not be skipped.
+ let dbName = "EnsureThereIsAtLeastOneOperationToRollback";
+ assert.writeOK(curPrimary.getDB(dbName).ensureRollback.insert(
+ {thisDocument: 'is inserted to ensure rollback is not skipped'}));
- // Isolate the current primary from everything so it steps down.
+ log(`Isolating the primary ${curPrimary.host} so it will step down`);
curPrimary.disconnect([curSecondary, arbiter]);
- // Wait for the primary to step down.
+ log(`Waiting for the primary ${curPrimary.host} to step down`);
+ try {
+ // The stepdown freeze period is short because the node is disconnected from
+ // the rest of the replica set, so it physically can't become the primary.
+ curPrimary.adminCommand({replSetStepDown: 1, force: true});
+ } catch (e) {
+ // Stepdown may fail if the node has already started stepping down.
+ print('Caught exception from replSetStepDown: ' + e);
+ }
+
waitForState(curPrimary, ReplSetTest.State.SECONDARY);
- // Reconnect the secondary to the arbiter so it can be elected.
+ log(`Reconnecting the secondary ${curSecondary.host} to the arbiter so it can be elected`);
curSecondary.reconnect([arbiter]);
- // Wait for the old secondary to become the new primary, which will be used as the sync
- // source later on when the old primary rolls back.
+ log(`Waiting for the new primary ${curSecondary.host} to be elected`);
+ if (rst.getReplSetConfig().protocolVersion > 0) {
+ assert.soonNoExcept(() => {
+ const res = curSecondary.adminCommand({replSetStepUp: 1});
+ return res.ok;
+ });
+ }
+
const newPrimary = rst.getPrimary();
// As a sanity check, ensure the new primary is the old secondary. The opposite scenario
// should never be possible with 2 electable nodes and the sequence of operations thus far.
assert.eq(newPrimary, curSecondary, "Did not elect a new node as primary");
-
- // Add a sleep and a dummy write to ensure the new primary has an optime greater than
- // the last optime on the node that will undergo rollback. This greater optime ensures that
- // the new primary is eligible to become a sync source in pv0.
- sleep(1000);
- var dbName = "ensureEligiblePV0";
- assert.writeOK(newPrimary.getDB(dbName).testColl.insert({id: 0}));
+ log(`Elected the old secondary ${newPrimary.host} as the new primary`);
+
+ if (rst.getReplSetConfig().protocolVersion === 0) {
+ // Add a sleep and a dummy write to ensure the new primary has an optime greater than
+ // the last optime on the node that will undergo rollback. This greater optime ensures
+ // that the new primary is eligible to become a sync source in pv0.
+ sleep(1000);
+ dbName = "ensureEligiblePV0";
+ assert.writeOK(newPrimary.getDB(dbName).testColl.insert({id: 0}));
+ }
// The old primary is the new secondary; the old secondary just got elected as the new
// primary, so we update the topology to reflect this change.
curSecondary = curPrimary;
curPrimary = newPrimary;
+ lastRBID = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid;
+
+ return curPrimary;
+ };
+
+ // DEPRECATED: remove this line after TIG-680.
+ this.transitionToSyncSourceOperations = this.transitionToSyncSourceOperationsBeforeRollback;
+
+ /**
+ * Transition to the third stage of rollback testing, where we reconnect the rollback node so
+ * it will start rolling back.
+ *
+ * Note that there is no guarantee that operations performed on the sync source while in this
+ * state will actually occur *during* the rollback process. They may happen before the rollback
+ * is finished or after the rollback is done. We provide this state, though, as an attempt to
+ * provide a way to test this behavior, even if it's non-deterministic.
+ */
+ this.transitionToSyncSourceOperationsDuringRollback = function() {
+ transitionIfAllowed(State.kSyncSourceOpsDuringRollback);
+
+ log(`Reconnecting the secondary ${curSecondary.host} so it'll go into rollback`);
+ curSecondary.reconnect([curPrimary, arbiter]);
+
return curPrimary;
};
this.stop = function() {
+ checkDataConsistency();
transitionIfAllowed(State.kStopped);
- const name = rst.name;
- rst.checkOplogs(name);
- rst.checkReplicatedDataHashes(name);
return rst.stopSet();
};
diff --git a/jstests/replsets/rollback_all_op_types.js b/jstests/replsets/rollback_all_op_types.js
index cb1020dc4ad..828b3276786 100644
--- a/jstests/replsets/rollback_all_op_types.js
+++ b/jstests/replsets/rollback_all_op_types.js
@@ -354,8 +354,9 @@
RollbackOps(rollbackNode);
// Wait for rollback to finish.
- rollbackTest.transitionToSyncSourceOperations();
- rollbackTest.transitionToSteadyStateOperations({waitForRollback: true});
+ rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
+ rollbackTest.transitionToSyncSourceOperationsDuringRollback();
+ rollbackTest.transitionToSteadyStateOperations();
// Check the replica set.
rollbackTest.stop();
diff --git a/jstests/replsets/rollback_collmods.js b/jstests/replsets/rollback_collmods.js
index e6db187d6b4..faa2c09194c 100644
--- a/jstests/replsets/rollback_collmods.js
+++ b/jstests/replsets/rollback_collmods.js
@@ -92,12 +92,13 @@
printCollectionOptions(rollbackTest, "before branch");
RollbackOps(rollbackNode);
- rollbackTest.transitionToSyncSourceOperations();
+ rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
printCollectionOptions(rollbackTest, "before rollback");
// No ops on the sync source.
// Wait for rollback to finish.
- rollbackTest.transitionToSteadyStateOperations({waitForRollback: true});
+ rollbackTest.transitionToSyncSourceOperationsDuringRollback();
+ rollbackTest.transitionToSteadyStateOperations();
printCollectionOptions(rollbackTest, "after rollback");
SteadyStateOps(rollbackTest.getPrimary());
diff --git a/jstests/replsets/rollback_drop_database.js b/jstests/replsets/rollback_drop_database.js
index 7d52c8a67ef..b48774efdd3 100644
--- a/jstests/replsets/rollback_drop_database.js
+++ b/jstests/replsets/rollback_drop_database.js
@@ -56,13 +56,14 @@
jsTestLog("Database " + oldDbName + " successfully dropped on primary node " +
rollbackNode.host);
- rollbackTest.transitionToSyncSourceOperations();
+ rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
// Perform an insert on another database while interfacing with the new primary.
// This is the sync source's divergent oplog entry.
assert.writeOK(syncSourceNode.getDB(newDbName)["afterRollback"].insert({"num": 2}));
- rollbackTest.transitionToSteadyStateOperations({waitForRollback: true});
+ rollbackTest.transitionToSyncSourceOperationsDuringRollback();
+ rollbackTest.transitionToSteadyStateOperations();
rollbackTest.stop();
-})(); \ No newline at end of file
+})();
diff --git a/jstests/replsets/rollback_drop_index_after_rename.js b/jstests/replsets/rollback_drop_index_after_rename.js
index e3b965c1c9b..efeffcdbf68 100644
--- a/jstests/replsets/rollback_drop_index_after_rename.js
+++ b/jstests/replsets/rollback_drop_index_after_rename.js
@@ -49,11 +49,10 @@
let rollbackNode = rollbackTest.transitionToRollbackOperations();
RollbackOps(rollbackNode);
- // No sync source ops.
- rollbackTest.transitionToSyncSourceOperations();
-
// Wait for rollback to finish.
- rollbackTest.transitionToSteadyStateOperations({waitForRollback: true});
+ rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
+ rollbackTest.transitionToSyncSourceOperationsDuringRollback();
+ rollbackTest.transitionToSteadyStateOperations();
rollbackTest.stop();
})();
diff --git a/jstests/replsets/rollback_during_downgrade.js b/jstests/replsets/rollback_during_downgrade.js
index 77733a1f1b9..ccacb2b73f0 100644
--- a/jstests/replsets/rollback_during_downgrade.js
+++ b/jstests/replsets/rollback_during_downgrade.js
@@ -71,7 +71,7 @@
assert.commandWorked(rollbackNode.getDB(dbName)['tToDropIndexes'].dropIndexes());
// ----------------- Begins running operations only on the sync source node ---------------
- rollbackTest.transitionToSyncSourceOperations();
+ rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
// Fake removing the UUID on the sync source, to test when neither node has a UUID for
// completeness.
@@ -82,7 +82,8 @@
}));
// ----------------- Allows rollback to occur and checks for consistency ------------------
- rollbackTest.transitionToSteadyStateOperations({waitForRollback: true});
+ rollbackTest.transitionToSyncSourceOperationsDuringRollback();
+ rollbackTest.transitionToSteadyStateOperations();
waitForDowngradeToFinish();
diff --git a/jstests/replsets/rollback_during_upgrade.js b/jstests/replsets/rollback_during_upgrade.js
index 66e7c8d2bdb..739908a6c73 100644
--- a/jstests/replsets/rollback_during_upgrade.js
+++ b/jstests/replsets/rollback_during_upgrade.js
@@ -88,7 +88,7 @@
}));
// ----------------- Begins running operations only on the sync source node ---------------
- rollbackTest.transitionToSyncSourceOperations();
+ rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
// Fake giving the collections UUIDs to simulate an upgrade on the sync source.
// The rollback test fixture only has two data bearing nodes so we cannot run an upgrade using
@@ -103,8 +103,8 @@
}));
// ----------------- Allows rollback to occur and checks for consistency ------------------
- rollbackTest.transitionToSteadyStateOperations({waitForRollback: true});
-
+ rollbackTest.transitionToSyncSourceOperationsDuringRollback();
+ rollbackTest.transitionToSteadyStateOperations();
waitForUpgradeToFinish();
rollbackTest.stop();
diff --git a/jstests/replsets/rollback_rename_collection_on_sync_source.js b/jstests/replsets/rollback_rename_collection_on_sync_source.js
index dfb24b68050..0f781eb6b8d 100644
--- a/jstests/replsets/rollback_rename_collection_on_sync_source.js
+++ b/jstests/replsets/rollback_rename_collection_on_sync_source.js
@@ -39,11 +39,12 @@
let rollbackNode = rollbackTest.transitionToRollbackOperations();
RollbackOps(rollbackNode);
- let syncSourceNode = rollbackTest.transitionToSyncSourceOperations();
+ let syncSourceNode = rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
SyncSourceOps(syncSourceNode);
// Wait for rollback to finish.
- rollbackTest.transitionToSteadyStateOperations({waitForRollback: true});
+ rollbackTest.transitionToSyncSourceOperationsDuringRollback();
+ rollbackTest.transitionToSteadyStateOperations();
// Check the replica set.
rollbackTest.stop();