diff options
author | clang-format-7.0.1 <adam.martin@10gen.com> | 2019-07-26 18:20:35 -0400 |
---|---|---|
committer | ADAM David Alan Martin <adam.martin@10gen.com> | 2019-07-27 11:02:23 -0400 |
commit | 134a4083953270e8a11430395357fb70a29047ad (patch) | |
tree | dd428e1230e31d92b20b393dfdc17ffe7fa79cb6 /jstests/replsets/catchup.js | |
parent | 1e46b5049003f427047e723ea5fab15b5a9253ca (diff) | |
download | mongo-134a4083953270e8a11430395357fb70a29047ad.tar.gz |
SERVER-41772 Apply clang-format 7.0.1 to the codebase
Diffstat (limited to 'jstests/replsets/catchup.js')
-rw-r--r-- | jstests/replsets/catchup.js | 422 |
1 files changed, 210 insertions, 212 deletions
diff --git a/jstests/replsets/catchup.js b/jstests/replsets/catchup.js index 78ec12ab888..772a8dfa3a4 100644 --- a/jstests/replsets/catchup.js +++ b/jstests/replsets/catchup.js @@ -1,221 +1,219 @@ // Test the catch-up behavior of new primaries. (function() { - "use strict"; - - load("jstests/libs/check_log.js"); - load("jstests/libs/write_concern_util.js"); - load("jstests/replsets/libs/election_metrics.js"); - load("jstests/replsets/rslib.js"); - - var name = "catch_up"; - var rst = new ReplSetTest({name: name, nodes: 3, useBridge: true, waitForKeys: true}); - - rst.startSet(); - var conf = rst.getReplSetConfig(); - conf.members[2].priority = 0; - conf.settings = { - heartbeatIntervalMillis: 500, - electionTimeoutMillis: 10000, - catchUpTimeoutMillis: 4 * 60 * 1000 - }; - rst.initiate(conf); - rst.awaitSecondaryNodes(); - - var primary = rst.getPrimary(); - var primaryColl = primary.getDB("test").coll; - - // Set verbosity for replication on all nodes. - var verbosity = { - "setParameter": 1, - "logComponentVerbosity": { - "replication": {"verbosity": 2}, - } - }; - rst.nodes.forEach(function(node) { - node.adminCommand(verbosity); - }); - - function stepUpNode(node) { - assert.soonNoExcept(function() { - assert.commandWorked(node.adminCommand({replSetStepUp: 1})); - rst.awaitNodesAgreeOnPrimary(rst.kDefaultTimeoutMS, rst.nodes, rst.getNodeId(node)); - return node.adminCommand('replSetGetStatus').myState == ReplSetTest.State.PRIMARY; - }, 'failed to step up node ' + node.host, rst.kDefaultTimeoutMS); - - return node; - } - - function checkOpInOplog(node, op, count) { - node.getDB("admin").getMongo().setSlaveOk(); - var oplog = node.getDB("local")['oplog.rs']; - var oplogArray = oplog.find().toArray(); - assert.eq(oplog.count(op), count, "op: " + tojson(op) + ", oplog: " + tojson(oplogArray)); +"use strict"; + +load("jstests/libs/check_log.js"); +load("jstests/libs/write_concern_util.js"); +load("jstests/replsets/libs/election_metrics.js"); +load("jstests/replsets/rslib.js"); + +var name = "catch_up"; +var rst = new ReplSetTest({name: name, nodes: 3, useBridge: true, waitForKeys: true}); + +rst.startSet(); +var conf = rst.getReplSetConfig(); +conf.members[2].priority = 0; +conf.settings = { + heartbeatIntervalMillis: 500, + electionTimeoutMillis: 10000, + catchUpTimeoutMillis: 4 * 60 * 1000 +}; +rst.initiate(conf); +rst.awaitSecondaryNodes(); + +var primary = rst.getPrimary(); +var primaryColl = primary.getDB("test").coll; + +// Set verbosity for replication on all nodes. +var verbosity = { + "setParameter": 1, + "logComponentVerbosity": { + "replication": {"verbosity": 2}, } - - // Stop replication on secondaries, do writes and step up one of the secondaries. - // - // The old primary has extra writes that are not replicated to the other nodes yet, - // but the new primary steps up, getting the vote from the the third node "voter". - function stopReplicationAndEnforceNewPrimaryToCatchUp() { - // Write documents that cannot be replicated to secondaries in time. - var oldSecondaries = rst.getSecondaries(); - var oldPrimary = rst.getPrimary(); - stopServerReplication(oldSecondaries); - for (var i = 0; i < 3; i++) { - assert.writeOK(oldPrimary.getDB("test").foo.insert({x: i})); - } - var latestOpOnOldPrimary = getLatestOp(oldPrimary); - - // New primary wins immediately, but needs to catch up. - var newPrimary = stepUpNode(oldSecondaries[0]); - var latestOpOnNewPrimary = getLatestOp(newPrimary); - // Check this node is not writable. - assert.eq(newPrimary.getDB("test").isMaster().ismaster, false); - - return { - oldSecondaries: oldSecondaries, - oldPrimary: oldPrimary, - newPrimary: newPrimary, - voter: oldSecondaries[1], - latestOpOnOldPrimary: latestOpOnOldPrimary, - latestOpOnNewPrimary: latestOpOnNewPrimary - }; - } - - function reconfigElectionAndCatchUpTimeout(electionTimeout, catchupTimeout) { - // Reconnect all nodes to make sure reconfig succeeds. - rst.nodes.forEach(reconnect); - // Reconfigure replica set to decrease catchup timeout. - var newConfig = rst.getReplSetConfigFromNode(); - newConfig.version++; - newConfig.settings.catchUpTimeoutMillis = catchupTimeout; - newConfig.settings.electionTimeoutMillis = electionTimeout; - reconfig(rst, newConfig); - rst.awaitReplication(); - rst.awaitNodesAgreeOnPrimary(); +}; +rst.nodes.forEach(function(node) { + node.adminCommand(verbosity); +}); + +function stepUpNode(node) { + assert.soonNoExcept(function() { + assert.commandWorked(node.adminCommand({replSetStepUp: 1})); + rst.awaitNodesAgreeOnPrimary(rst.kDefaultTimeoutMS, rst.nodes, rst.getNodeId(node)); + return node.adminCommand('replSetGetStatus').myState == ReplSetTest.State.PRIMARY; + }, 'failed to step up node ' + node.host, rst.kDefaultTimeoutMS); + + return node; +} + +function checkOpInOplog(node, op, count) { + node.getDB("admin").getMongo().setSlaveOk(); + var oplog = node.getDB("local")['oplog.rs']; + var oplogArray = oplog.find().toArray(); + assert.eq(oplog.count(op), count, "op: " + tojson(op) + ", oplog: " + tojson(oplogArray)); +} + +// Stop replication on secondaries, do writes and step up one of the secondaries. +// +// The old primary has extra writes that are not replicated to the other nodes yet, +// but the new primary steps up, getting the vote from the the third node "voter". +function stopReplicationAndEnforceNewPrimaryToCatchUp() { + // Write documents that cannot be replicated to secondaries in time. + var oldSecondaries = rst.getSecondaries(); + var oldPrimary = rst.getPrimary(); + stopServerReplication(oldSecondaries); + for (var i = 0; i < 3; i++) { + assert.writeOK(oldPrimary.getDB("test").foo.insert({x: i})); } + var latestOpOnOldPrimary = getLatestOp(oldPrimary); + + // New primary wins immediately, but needs to catch up. + var newPrimary = stepUpNode(oldSecondaries[0]); + var latestOpOnNewPrimary = getLatestOp(newPrimary); + // Check this node is not writable. + assert.eq(newPrimary.getDB("test").isMaster().ismaster, false); + + return { + oldSecondaries: oldSecondaries, + oldPrimary: oldPrimary, + newPrimary: newPrimary, + voter: oldSecondaries[1], + latestOpOnOldPrimary: latestOpOnOldPrimary, + latestOpOnNewPrimary: latestOpOnNewPrimary + }; +} - rst.awaitReplication(); - - jsTest.log("Case 1: The primary is up-to-date after refreshing heartbeats."); - // Should complete transition to primary immediately. - var newPrimary = stepUpNode(rst.getSecondary()); - // Should win an election and finish the transition very quickly. - assert.eq(newPrimary, rst.getPrimary()); - rst.awaitReplication(); - - jsTest.log("Case 2: The primary needs to catch up, succeeds in time."); - let initialNewPrimaryStatus = - assert.commandWorked(rst.getSecondaries()[0].adminCommand({serverStatus: 1})); - - var stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp(); - - // Disable fail point to allow replication. - restartServerReplication(stepUpResults.oldSecondaries); - // getPrimary() blocks until the primary finishes drain mode. - assert.eq(stepUpResults.newPrimary, rst.getPrimary()); - - // Check that the 'numCatchUps' field has been incremented in serverStatus. - let newNewPrimaryStatus = - assert.commandWorked(stepUpResults.newPrimary.adminCommand({serverStatus: 1})); - verifyServerStatusChange(initialNewPrimaryStatus.electionMetrics, - newNewPrimaryStatus.electionMetrics, - 'numCatchUps', - 1); - - // Wait for all secondaries to catch up - rst.awaitReplication(); - // Check the latest op on old primary is preserved on the new one. - checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 1); - rst.awaitReplication(); - - jsTest.log("Case 3: The primary needs to catch up, but has to change sync source to catch up."); - // Reconfig the election timeout to be longer than 1 minute so that the third node will no - // longer be blacklisted by the new primary if it happened to be at the beginning of the test. - reconfigElectionAndCatchUpTimeout(3 * 60 * 1000, conf.settings.catchUpTimeoutMillis); - - stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp(); - - // Disable fail point on the voter. Wait until it catches up with the old primary. - restartServerReplication(stepUpResults.voter); - assert.commandWorked( - stepUpResults.voter.adminCommand({replSetSyncFrom: stepUpResults.oldPrimary.host})); - // Wait until the new primary knows the last applied optime on the voter, so it will keep - // catching up after the old primary is disconnected. - assert.soon(function() { - var replSetStatus = - assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetGetStatus: 1})); - var voterStatus = replSetStatus.members.filter(m => m.name == stepUpResults.voter.host)[0]; - return rs.compareOpTimes(voterStatus.optime, stepUpResults.latestOpOnOldPrimary) == 0; - }); - // Disconnect the new primary and the old one. - stepUpResults.oldPrimary.disconnect(stepUpResults.newPrimary); - // Disable the failpoint, the new primary should sync from the other secondary. - restartServerReplication(stepUpResults.newPrimary); - assert.eq(stepUpResults.newPrimary, rst.getPrimary()); - checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 1); - // Restore the broken connection - stepUpResults.oldPrimary.reconnect(stepUpResults.newPrimary); - rst.awaitReplication(); - - jsTest.log("Case 4: The primary needs to catch up, fails due to timeout."); - // Reconfig to make the catchup timeout shorter. - reconfigElectionAndCatchUpTimeout(conf.settings.electionTimeoutMillis, 10 * 1000); - - stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp(); - // Wait until the new primary completes the transition to primary and writes a no-op. - checkLog.contains(stepUpResults.newPrimary, "Catchup timed out after becoming primary"); - restartServerReplication(stepUpResults.newPrimary); - assert.eq(stepUpResults.newPrimary, rst.getPrimary()); - - // Wait for the no-op "new primary" after winning an election, so that we know it has - // finished transition to primary. - assert.soon(function() { - return rs.compareOpTimes(stepUpResults.latestOpOnOldPrimary, - getLatestOp(stepUpResults.newPrimary)) < 0; - }); - // The extra oplog entries on the old primary are not replicated to the new one. - checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 0); - restartServerReplication(stepUpResults.voter); - rst.awaitReplication(); - - jsTest.log("Case 5: The primary needs to catch up with no timeout, then gets aborted."); - // Reconfig to make the catchup timeout infinite. - reconfigElectionAndCatchUpTimeout(conf.settings.electionTimeoutMillis, -1); - stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp(); - - // Abort catchup. - assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetAbortPrimaryCatchUp: 1})); - - // Wait for the no-op "new primary" after winning an election, so that we know it has - // finished transition to primary. - assert.soon(function() { - return rs.compareOpTimes(stepUpResults.latestOpOnOldPrimary, - getLatestOp(stepUpResults.newPrimary)) < 0; - }); - // The extra oplog entries on the old primary are not replicated to the new one. - checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 0); - restartServerReplication(stepUpResults.oldSecondaries); - rst.awaitReplication(); - checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 0); - - jsTest.log("Case 6: The primary needs to catch up with no timeout, but steps down."); - var stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp(); - - // Step-down command should abort catchup. - assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetStepDown: 60})); - - // Rename the primary. - var steppedDownPrimary = stepUpResults.newPrimary; - var newPrimary = rst.getPrimary(); - assert.neq(newPrimary, steppedDownPrimary); - - // Enable data replication on the stepped down primary and make sure it syncs old writes. +function reconfigElectionAndCatchUpTimeout(electionTimeout, catchupTimeout) { + // Reconnect all nodes to make sure reconfig succeeds. rst.nodes.forEach(reconnect); - restartServerReplication(stepUpResults.oldSecondaries); + // Reconfigure replica set to decrease catchup timeout. + var newConfig = rst.getReplSetConfigFromNode(); + newConfig.version++; + newConfig.settings.catchUpTimeoutMillis = catchupTimeout; + newConfig.settings.electionTimeoutMillis = electionTimeout; + reconfig(rst, newConfig); rst.awaitReplication(); - checkOpInOplog(steppedDownPrimary, stepUpResults.latestOpOnOldPrimary, 1); - - rst.stopSet(); + rst.awaitNodesAgreeOnPrimary(); +} + +rst.awaitReplication(); + +jsTest.log("Case 1: The primary is up-to-date after refreshing heartbeats."); +// Should complete transition to primary immediately. +var newPrimary = stepUpNode(rst.getSecondary()); +// Should win an election and finish the transition very quickly. +assert.eq(newPrimary, rst.getPrimary()); +rst.awaitReplication(); + +jsTest.log("Case 2: The primary needs to catch up, succeeds in time."); +let initialNewPrimaryStatus = + assert.commandWorked(rst.getSecondaries()[0].adminCommand({serverStatus: 1})); + +var stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp(); + +// Disable fail point to allow replication. +restartServerReplication(stepUpResults.oldSecondaries); +// getPrimary() blocks until the primary finishes drain mode. +assert.eq(stepUpResults.newPrimary, rst.getPrimary()); + +// Check that the 'numCatchUps' field has been incremented in serverStatus. +let newNewPrimaryStatus = + assert.commandWorked(stepUpResults.newPrimary.adminCommand({serverStatus: 1})); +verifyServerStatusChange( + initialNewPrimaryStatus.electionMetrics, newNewPrimaryStatus.electionMetrics, 'numCatchUps', 1); + +// Wait for all secondaries to catch up +rst.awaitReplication(); +// Check the latest op on old primary is preserved on the new one. +checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 1); +rst.awaitReplication(); + +jsTest.log("Case 3: The primary needs to catch up, but has to change sync source to catch up."); +// Reconfig the election timeout to be longer than 1 minute so that the third node will no +// longer be blacklisted by the new primary if it happened to be at the beginning of the test. +reconfigElectionAndCatchUpTimeout(3 * 60 * 1000, conf.settings.catchUpTimeoutMillis); + +stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp(); + +// Disable fail point on the voter. Wait until it catches up with the old primary. +restartServerReplication(stepUpResults.voter); +assert.commandWorked( + stepUpResults.voter.adminCommand({replSetSyncFrom: stepUpResults.oldPrimary.host})); +// Wait until the new primary knows the last applied optime on the voter, so it will keep +// catching up after the old primary is disconnected. +assert.soon(function() { + var replSetStatus = + assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetGetStatus: 1})); + var voterStatus = replSetStatus.members.filter(m => m.name == stepUpResults.voter.host)[0]; + return rs.compareOpTimes(voterStatus.optime, stepUpResults.latestOpOnOldPrimary) == 0; +}); +// Disconnect the new primary and the old one. +stepUpResults.oldPrimary.disconnect(stepUpResults.newPrimary); +// Disable the failpoint, the new primary should sync from the other secondary. +restartServerReplication(stepUpResults.newPrimary); +assert.eq(stepUpResults.newPrimary, rst.getPrimary()); +checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 1); +// Restore the broken connection +stepUpResults.oldPrimary.reconnect(stepUpResults.newPrimary); +rst.awaitReplication(); + +jsTest.log("Case 4: The primary needs to catch up, fails due to timeout."); +// Reconfig to make the catchup timeout shorter. +reconfigElectionAndCatchUpTimeout(conf.settings.electionTimeoutMillis, 10 * 1000); + +stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp(); +// Wait until the new primary completes the transition to primary and writes a no-op. +checkLog.contains(stepUpResults.newPrimary, "Catchup timed out after becoming primary"); +restartServerReplication(stepUpResults.newPrimary); +assert.eq(stepUpResults.newPrimary, rst.getPrimary()); + +// Wait for the no-op "new primary" after winning an election, so that we know it has +// finished transition to primary. +assert.soon(function() { + return rs.compareOpTimes(stepUpResults.latestOpOnOldPrimary, + getLatestOp(stepUpResults.newPrimary)) < 0; +}); +// The extra oplog entries on the old primary are not replicated to the new one. +checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 0); +restartServerReplication(stepUpResults.voter); +rst.awaitReplication(); + +jsTest.log("Case 5: The primary needs to catch up with no timeout, then gets aborted."); +// Reconfig to make the catchup timeout infinite. +reconfigElectionAndCatchUpTimeout(conf.settings.electionTimeoutMillis, -1); +stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp(); + +// Abort catchup. +assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetAbortPrimaryCatchUp: 1})); + +// Wait for the no-op "new primary" after winning an election, so that we know it has +// finished transition to primary. +assert.soon(function() { + return rs.compareOpTimes(stepUpResults.latestOpOnOldPrimary, + getLatestOp(stepUpResults.newPrimary)) < 0; +}); +// The extra oplog entries on the old primary are not replicated to the new one. +checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 0); +restartServerReplication(stepUpResults.oldSecondaries); +rst.awaitReplication(); +checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 0); + +jsTest.log("Case 6: The primary needs to catch up with no timeout, but steps down."); +var stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp(); + +// Step-down command should abort catchup. +assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetStepDown: 60})); + +// Rename the primary. +var steppedDownPrimary = stepUpResults.newPrimary; +var newPrimary = rst.getPrimary(); +assert.neq(newPrimary, steppedDownPrimary); + +// Enable data replication on the stepped down primary and make sure it syncs old writes. +rst.nodes.forEach(reconnect); +restartServerReplication(stepUpResults.oldSecondaries); +rst.awaitReplication(); +checkOpInOplog(steppedDownPrimary, stepUpResults.latestOpOnOldPrimary, 1); + +rst.stopSet(); })(); |