diff options
author | Robert Guo <robert.guo@10gen.com> | 2017-11-30 13:48:57 -0500 |
---|---|---|
committer | Robert Guo <robert.guo@10gen.com> | 2018-01-10 17:39:11 -0500 |
commit | 23a516c7d0acd9b9f571029c9f119069fdbfd73d (patch) | |
tree | 1ad05cc9590c8dfbd6f520f13a301f8af451bc7e | |
parent | b89f752fe24dd4b24d59f39ea2f45b01aaca8250 (diff) | |
download | mongo-23a516c7d0acd9b9f571029c9f119069fdbfd73d.tar.gz |
SERVER-31917 add more logging to the rollback test fixture
-rw-r--r-- | jstests/replsets/libs/rollback_test.js | 189 | ||||
-rw-r--r-- | jstests/replsets/rollback_all_op_types.js | 5 | ||||
-rw-r--r-- | jstests/replsets/rollback_collmods.js | 5 | ||||
-rw-r--r-- | jstests/replsets/rollback_drop_database.js | 7 | ||||
-rw-r--r-- | jstests/replsets/rollback_drop_index_after_rename.js | 7 | ||||
-rw-r--r-- | jstests/replsets/rollback_during_downgrade.js | 5 | ||||
-rw-r--r-- | jstests/replsets/rollback_during_upgrade.js | 6 | ||||
-rw-r--r-- | jstests/replsets/rollback_rename_collection_on_sync_source.js | 5 |
8 files changed, 159 insertions, 70 deletions
diff --git a/jstests/replsets/libs/rollback_test.js b/jstests/replsets/libs/rollback_test.js index 12edd71dc92..0af1f6cb3a9 100644 --- a/jstests/replsets/libs/rollback_test.js +++ b/jstests/replsets/libs/rollback_test.js @@ -2,15 +2,14 @@ * Wrapper around ReplSetTest for testing rollback behavior. It allows the caller to easily * transition between stages of a rollback without having to configure the replset explicitly. * - * This library exposes the following 3 sequential stages of rollback: + * This library exposes the following 5 sequential stages of rollback: * 1. RollbackTest starts in kSteadyStateOps: the replica set is in steady state replication. * Operations applied will be replicated. * 2. kRollbackOps: operations applied during this phase will not be replicated and eventually be * rolled back. - * 3. kSyncSourceOps: operations applied during this stage will not be replicated initially, - * causing the data-bearing nodes to diverge. The data will be replicated after other - * nodes are rolled back. - * 4. kSteadyStateOps: (back to stage 1) with the option of waiting for the rollback to finish. + * 3. kSyncSourceOpsBeforeRollback: apply operations on the sync source before rollback begins. + * 4. kSyncSourceOpsDuringRollback: apply operations on the sync source after rollback has begun. + * 5. kSteadyStateOps: (same as stage 1) with the option of waiting for the rollback to finish. * * Please refer to the various `transition*` functions for more information on the behavior * of each stage. @@ -19,16 +18,22 @@ load("jstests/replsets/rslib.js"); function RollbackTest(name = "RollbackTest") { const State = { - kStopped: "stopped", - kRollbackOps: "doing operations that will be rolled back", - kSyncSourceOps: "doing operations on sync source", - kSteadyStateOps: "doing operations during steady state replication", + kStopped: "kStopped", + kRollbackOps: "kRollbackOps", + // DEPRECATED: Remove this line after TIG-680. + kSyncSourceOps: "kSyncSourceOpsBeforeRollback", + kSyncSourceOpsBeforeRollback: "kSyncSourceOpsBeforeRollback", + kSyncSourceOpsDuringRollback: "kSyncSourceOpsDuringRollback", + kSteadyStateOps: "kSteadyStateOps", }; const AcceptableTransitions = { [State.kStopped]: [], - [State.kRollbackOps]: [State.kSyncSourceOps], - [State.kSyncSourceOps]: [State.kSteadyStateOps], + [State.kRollbackOps]: [State.kSyncSourceOpsBeforeRollback], + // DEPRECATED: remove transition to State.kSteadyStateOps after TIG-680. + [State.kSyncSourceOpsBeforeRollback]: + [State.kSyncSourceOpsDuringRollback, State.kSteadyStateOps], + [State.kSyncSourceOpsDuringRollback]: [State.kSteadyStateOps], [State.kSteadyStateOps]: [State.kStopped, State.kRollbackOps], }; @@ -37,7 +42,9 @@ function RollbackTest(name = "RollbackTest") { let curPrimary; let curSecondary; let arbiter; - let curState; + + let curState = State.kSteadyStateOps; + let lastRBID; // Do more complicated instantiation in this init() function. This reduces the amount of code // run at the class level (i.e. outside of any functions) and improves readability. @@ -62,18 +69,36 @@ function RollbackTest(name = "RollbackTest") { // Wait for the primary to be ready. const actualPrimary = rst.getPrimary(); - assert.eq(actualPrimary, curPrimary, 'The ReplSetTest.node[0] is not the primary'); + assert.eq(actualPrimary, curPrimary, "ReplSetTest.node[0] is not the primary"); - curState = State.kSteadyStateOps; + lastRBID = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid; })(); + function checkDataConsistency() { + assert.eq(curState, + State.kSteadyStateOps, + "Not in kSteadyStateOps state, cannot check data consistency"); + const name = rst.name; + rst.checkOplogs(name); + rst.checkReplicatedDataHashes(name); + // TODO: SERVER-31920 run validate. + } + + function log(msg, important = false) { + if (important) { + jsTestLog(`[${name}] ${msg}`); + } else { + print(`[${name}] ${msg}`); + } + } + /** * return whether the cluster can transition from the current State to `newState`. * @private */ function transitionIfAllowed(newState) { if (AcceptableTransitions[curState].includes(newState)) { - jsTestLog(`[${name}] Transitioning to: "${newState}"`); + log(`Transitioning to: "${newState}"`, true); curState = newState; } else { // Transitioning to a disallowed State is likely a bug in the code, so we throw an @@ -85,31 +110,45 @@ function RollbackTest(name = "RollbackTest") { /** * Transition from a rollback state to a steady state. Operations applied in this phase will * be replicated to all nodes and should not be rolled back. - * - * @param waitForRollback specify whether to wait for rollback to finish before returning. */ - this.transitionToSteadyStateOperations = function( - {waitForRollback: waitForRollback = true} = {}) { - transitionIfAllowed(State.kSteadyStateOps); + this.transitionToSteadyStateOperations = function() { - const originalRBID = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid; - - // Connect the secondary to everything else so it'll go into rollback. + // Ensure the secondary is connected. It may already have been connected from a previous + // stage. + log(`Ensuring the secondary ${curSecondary.host} is connected to the other nodes`); curSecondary.reconnect([curPrimary, arbiter]); - if (waitForRollback) { - jsTestLog(`[${name}] Waiting for rollback to complete`); - assert.soonNoExcept(() => { + log("Waiting for rollback to complete", true); + let rbid = -1; + assert.soon(() => { + try { + rbid = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid; + } catch (e) { // Command can fail when sync source is being cleared. - const rbid = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid; - return rbid === originalRBID + 1; - }, 'Timed out waiting for RBID to increment'); + } + // Fail early if the rbid is greater than lastRBID+1. + assert.lte(rbid, + lastRBID + 1, + `RBID is too large. current RBID: ${rbid}, last RBID: ${lastRBID}`); - rst.awaitSecondaryNodes(); - rst.awaitReplication(); + return rbid === lastRBID + 1; - jsTestLog(`[${name}] Rollback completed`); - } + }, "Timed out waiting for RBID to increment"); + + rst.awaitSecondaryNodes(); + rst.awaitReplication(); + + log("Rollback and awaitReplication completed", true); + + // We call transition to steady state ops after awaiting replication has finished, + // otherwise it could be confusing to see operations being replicated when we're already + // in rollback complete state. + transitionIfAllowed(State.kSteadyStateOps); + + // After the previous rollback (if any) has completed and await replication has finished, + // the replica set should be in a consistent and "fresh" state. We now prepare for the next + // rollback. + checkDataConsistency(); return curPrimary; }; @@ -119,15 +158,17 @@ function RollbackTest(name = "RollbackTest") { * its operations will eventually be rolled back. */ this.transitionToRollbackOperations = function() { - transitionIfAllowed(State.kRollbackOps); - // Ensure previous operations are replicated. The current secondary will be used as the sync // source later on, so it must be up-to-date to prevent any previous operations from being // rolled back. + rst.awaitSecondaryNodes(); rst.awaitReplication(); + transitionIfAllowed(State.kRollbackOps); + // Disconnect the current primary from the secondary so operations on it will eventually be // rolled back. But do not disconnect it from the arbiter so it can stay as the primary. + log(`Isolating the primary ${curPrimary.host} from the secondary ${curSecondary.host}`); curPrimary.disconnect([curSecondary]); return curPrimary; @@ -139,46 +180,90 @@ function RollbackTest(name = "RollbackTest") { * primary so that that optimes diverge and previous operations on the old primary will be * rolled back. */ - this.transitionToSyncSourceOperations = function() { - transitionIfAllowed(State.kSyncSourceOps); + this.transitionToSyncSourceOperationsBeforeRollback = function() { + transitionIfAllowed(State.kSyncSourceOpsBeforeRollback); + + // Insert one document to ensure rollback will not be skipped. + let dbName = "EnsureThereIsAtLeastOneOperationToRollback"; + assert.writeOK(curPrimary.getDB(dbName).ensureRollback.insert( + {thisDocument: 'is inserted to ensure rollback is not skipped'})); - // Isolate the current primary from everything so it steps down. + log(`Isolating the primary ${curPrimary.host} so it will step down`); curPrimary.disconnect([curSecondary, arbiter]); - // Wait for the primary to step down. + log(`Waiting for the primary ${curPrimary.host} to step down`); + try { + // The stepdown freeze period is short because the node is disconnected from + // the rest of the replica set, so it physically can't become the primary. + curPrimary.adminCommand({replSetStepDown: 1, force: true}); + } catch (e) { + // Stepdown may fail if the node has already started stepping down. + print('Caught exception from replSetStepDown: ' + e); + } + waitForState(curPrimary, ReplSetTest.State.SECONDARY); - // Reconnect the secondary to the arbiter so it can be elected. + log(`Reconnecting the secondary ${curSecondary.host} to the arbiter so it can be elected`); curSecondary.reconnect([arbiter]); - // Wait for the old secondary to become the new primary, which will be used as the sync - // source later on when the old primary rolls back. + log(`Waiting for the new primary ${curSecondary.host} to be elected`); + if (rst.getReplSetConfig().protocolVersion > 0) { + assert.soonNoExcept(() => { + const res = curSecondary.adminCommand({replSetStepUp: 1}); + return res.ok; + }); + } + const newPrimary = rst.getPrimary(); // As a sanity check, ensure the new primary is the old secondary. The opposite scenario // should never be possible with 2 electable nodes and the sequence of operations thus far. assert.eq(newPrimary, curSecondary, "Did not elect a new node as primary"); - - // Add a sleep and a dummy write to ensure the new primary has an optime greater than - // the last optime on the node that will undergo rollback. This greater optime ensures that - // the new primary is eligible to become a sync source in pv0. - sleep(1000); - var dbName = "ensureEligiblePV0"; - assert.writeOK(newPrimary.getDB(dbName).testColl.insert({id: 0})); + log(`Elected the old secondary ${newPrimary.host} as the new primary`); + + if (rst.getReplSetConfig().protocolVersion === 0) { + // Add a sleep and a dummy write to ensure the new primary has an optime greater than + // the last optime on the node that will undergo rollback. This greater optime ensures + // that the new primary is eligible to become a sync source in pv0. + sleep(1000); + dbName = "ensureEligiblePV0"; + assert.writeOK(newPrimary.getDB(dbName).testColl.insert({id: 0})); + } // The old primary is the new secondary; the old secondary just got elected as the new // primary, so we update the topology to reflect this change. curSecondary = curPrimary; curPrimary = newPrimary; + lastRBID = assert.commandWorked(curSecondary.adminCommand("replSetGetRBID")).rbid; + + return curPrimary; + }; + + // DEPRECATED: remove this line after TIG-680. + this.transitionToSyncSourceOperations = this.transitionToSyncSourceOperationsBeforeRollback; + + /** + * Transition to the third stage of rollback testing, where we reconnect the rollback node so + * it will start rolling back. + * + * Note that there is no guarantee that operations performed on the sync source while in this + * state will actually occur *during* the rollback process. They may happen before the rollback + * is finished or after the rollback is done. We provide this state, though, as an attempt to + * provide a way to test this behavior, even if it's non-deterministic. + */ + this.transitionToSyncSourceOperationsDuringRollback = function() { + transitionIfAllowed(State.kSyncSourceOpsDuringRollback); + + log(`Reconnecting the secondary ${curSecondary.host} so it'll go into rollback`); + curSecondary.reconnect([curPrimary, arbiter]); + return curPrimary; }; this.stop = function() { + checkDataConsistency(); transitionIfAllowed(State.kStopped); - const name = rst.name; - rst.checkOplogs(name); - rst.checkReplicatedDataHashes(name); return rst.stopSet(); }; diff --git a/jstests/replsets/rollback_all_op_types.js b/jstests/replsets/rollback_all_op_types.js index cb1020dc4ad..828b3276786 100644 --- a/jstests/replsets/rollback_all_op_types.js +++ b/jstests/replsets/rollback_all_op_types.js @@ -354,8 +354,9 @@ RollbackOps(rollbackNode); // Wait for rollback to finish. - rollbackTest.transitionToSyncSourceOperations(); - rollbackTest.transitionToSteadyStateOperations({waitForRollback: true}); + rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); + rollbackTest.transitionToSyncSourceOperationsDuringRollback(); + rollbackTest.transitionToSteadyStateOperations(); // Check the replica set. rollbackTest.stop(); diff --git a/jstests/replsets/rollback_collmods.js b/jstests/replsets/rollback_collmods.js index e6db187d6b4..faa2c09194c 100644 --- a/jstests/replsets/rollback_collmods.js +++ b/jstests/replsets/rollback_collmods.js @@ -92,12 +92,13 @@ printCollectionOptions(rollbackTest, "before branch"); RollbackOps(rollbackNode); - rollbackTest.transitionToSyncSourceOperations(); + rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); printCollectionOptions(rollbackTest, "before rollback"); // No ops on the sync source. // Wait for rollback to finish. - rollbackTest.transitionToSteadyStateOperations({waitForRollback: true}); + rollbackTest.transitionToSyncSourceOperationsDuringRollback(); + rollbackTest.transitionToSteadyStateOperations(); printCollectionOptions(rollbackTest, "after rollback"); SteadyStateOps(rollbackTest.getPrimary()); diff --git a/jstests/replsets/rollback_drop_database.js b/jstests/replsets/rollback_drop_database.js index 7d52c8a67ef..b48774efdd3 100644 --- a/jstests/replsets/rollback_drop_database.js +++ b/jstests/replsets/rollback_drop_database.js @@ -56,13 +56,14 @@ jsTestLog("Database " + oldDbName + " successfully dropped on primary node " + rollbackNode.host); - rollbackTest.transitionToSyncSourceOperations(); + rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); // Perform an insert on another database while interfacing with the new primary. // This is the sync source's divergent oplog entry. assert.writeOK(syncSourceNode.getDB(newDbName)["afterRollback"].insert({"num": 2})); - rollbackTest.transitionToSteadyStateOperations({waitForRollback: true}); + rollbackTest.transitionToSyncSourceOperationsDuringRollback(); + rollbackTest.transitionToSteadyStateOperations(); rollbackTest.stop(); -})();
\ No newline at end of file +})(); diff --git a/jstests/replsets/rollback_drop_index_after_rename.js b/jstests/replsets/rollback_drop_index_after_rename.js index e3b965c1c9b..efeffcdbf68 100644 --- a/jstests/replsets/rollback_drop_index_after_rename.js +++ b/jstests/replsets/rollback_drop_index_after_rename.js @@ -49,11 +49,10 @@ let rollbackNode = rollbackTest.transitionToRollbackOperations(); RollbackOps(rollbackNode); - // No sync source ops. - rollbackTest.transitionToSyncSourceOperations(); - // Wait for rollback to finish. - rollbackTest.transitionToSteadyStateOperations({waitForRollback: true}); + rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); + rollbackTest.transitionToSyncSourceOperationsDuringRollback(); + rollbackTest.transitionToSteadyStateOperations(); rollbackTest.stop(); })(); diff --git a/jstests/replsets/rollback_during_downgrade.js b/jstests/replsets/rollback_during_downgrade.js index 77733a1f1b9..ccacb2b73f0 100644 --- a/jstests/replsets/rollback_during_downgrade.js +++ b/jstests/replsets/rollback_during_downgrade.js @@ -71,7 +71,7 @@ assert.commandWorked(rollbackNode.getDB(dbName)['tToDropIndexes'].dropIndexes()); // ----------------- Begins running operations only on the sync source node --------------- - rollbackTest.transitionToSyncSourceOperations(); + rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); // Fake removing the UUID on the sync source, to test when neither node has a UUID for // completeness. @@ -82,7 +82,8 @@ })); // ----------------- Allows rollback to occur and checks for consistency ------------------ - rollbackTest.transitionToSteadyStateOperations({waitForRollback: true}); + rollbackTest.transitionToSyncSourceOperationsDuringRollback(); + rollbackTest.transitionToSteadyStateOperations(); waitForDowngradeToFinish(); diff --git a/jstests/replsets/rollback_during_upgrade.js b/jstests/replsets/rollback_during_upgrade.js index 66e7c8d2bdb..739908a6c73 100644 --- a/jstests/replsets/rollback_during_upgrade.js +++ b/jstests/replsets/rollback_during_upgrade.js @@ -88,7 +88,7 @@ })); // ----------------- Begins running operations only on the sync source node --------------- - rollbackTest.transitionToSyncSourceOperations(); + rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); // Fake giving the collections UUIDs to simulate an upgrade on the sync source. // The rollback test fixture only has two data bearing nodes so we cannot run an upgrade using @@ -103,8 +103,8 @@ })); // ----------------- Allows rollback to occur and checks for consistency ------------------ - rollbackTest.transitionToSteadyStateOperations({waitForRollback: true}); - + rollbackTest.transitionToSyncSourceOperationsDuringRollback(); + rollbackTest.transitionToSteadyStateOperations(); waitForUpgradeToFinish(); rollbackTest.stop(); diff --git a/jstests/replsets/rollback_rename_collection_on_sync_source.js b/jstests/replsets/rollback_rename_collection_on_sync_source.js index dfb24b68050..0f781eb6b8d 100644 --- a/jstests/replsets/rollback_rename_collection_on_sync_source.js +++ b/jstests/replsets/rollback_rename_collection_on_sync_source.js @@ -39,11 +39,12 @@ let rollbackNode = rollbackTest.transitionToRollbackOperations(); RollbackOps(rollbackNode); - let syncSourceNode = rollbackTest.transitionToSyncSourceOperations(); + let syncSourceNode = rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); SyncSourceOps(syncSourceNode); // Wait for rollback to finish. - rollbackTest.transitionToSteadyStateOperations({waitForRollback: true}); + rollbackTest.transitionToSyncSourceOperationsDuringRollback(); + rollbackTest.transitionToSteadyStateOperations(); // Check the replica set. rollbackTest.stop(); |