diff options
author | Robert Guo <robert.guo@10gen.com> | 2017-04-20 10:58:37 -0400 |
---|---|---|
committer | Robert Guo <robert.guo@10gen.com> | 2017-04-20 10:58:57 -0400 |
commit | c08590a6ac9dc54c9d910822d47ea17140b56f89 (patch) | |
tree | 9a6986057f4453f858fac87d43b7435e56f5e807 /jstests | |
parent | fac33fe5a6814169c9c6131d80f1b325c74647da (diff) | |
download | mongo-c08590a6ac9dc54c9d910822d47ea17140b56f89.tar.gz |
Revert "SERVER-26848 Exit catchup mode when not syncing more data."
This reverts commit d0c851e2f4bfea514e22c97af1838640d2849a8c.
Diffstat (limited to 'jstests')
-rw-r--r-- | jstests/multiVersion/downgrade_replset.js | 65 | ||||
-rw-r--r-- | jstests/multiVersion/initialsync.js | 9 | ||||
-rw-r--r-- | jstests/replsets/catchup.js | 187 | ||||
-rw-r--r-- | jstests/replsets/rslib.js | 1 |
4 files changed, 103 insertions, 159 deletions
diff --git a/jstests/multiVersion/downgrade_replset.js b/jstests/multiVersion/downgrade_replset.js index 022471410a1..658b35813a2 100644 --- a/jstests/multiVersion/downgrade_replset.js +++ b/jstests/multiVersion/downgrade_replset.js @@ -14,45 +14,38 @@ var nodes = { n3: {binVersion: newVersion} }; -function runDowngradeTest(protocolVersion) { - var rst = new ReplSetTest({name: name, nodes: nodes}); - rst.startSet(); - var replSetConfig = rst.getReplSetConfig(); - replSetConfig.protocolVersion = protocolVersion; - // Hard-code catchup timeout to be compatible with 3.4 - replSetConfig.settings = {catchUpTimeoutMillis: 2000}; - rst.initiate(replSetConfig); - - var primary = rst.getPrimary(); - var coll = "test.foo"; - - jsTest.log("Inserting documents into collection."); - for (var i = 0; i < 10; i++) { - primary.getCollection(coll).insert({_id: i, str: "hello world"}); - } +var rst = new ReplSetTest({name: name, nodes: nodes}); +rst.startSet(); +var replSetConfig = rst.getReplSetConfig(); +replSetConfig.protocolVersion = 0; +rst.initiate(replSetConfig); + +var primary = rst.getPrimary(); +var coll = "test.foo"; + +jsTest.log("Inserting documents into collection."); +for (var i = 0; i < 10; i++) { + primary.getCollection(coll).insert({_id: i, str: "hello world"}); +} - function insertDocuments(rsURL, coll) { - var coll = new Mongo(rsURL).getCollection(coll); - var count = 10; - while (!isFinished()) { - assert.writeOK(coll.insert({_id: count, str: "hello world"})); - count++; - } +function insertDocuments(rsURL, coll) { + var coll = new Mongo(rsURL).getCollection(coll); + var count = 10; + while (!isFinished()) { + assert.writeOK(coll.insert({_id: count, str: "hello world"})); + count++; } +} - jsTest.log("Starting parallel operations during downgrade.."); - var joinFindInsert = startParallelOps(primary, insertDocuments, [rst.getURL(), coll]); - - jsTest.log("Downgrading replica set.."); - rst.upgradeSet({binVersion: oldVersion}); - jsTest.log("Downgrade complete."); +jsTest.log("Starting parallel operations during downgrade.."); +var joinFindInsert = startParallelOps(primary, insertDocuments, [rst.getURL(), coll]); - primary = rst.getPrimary(); - printjson(rst.status()); +jsTest.log("Downgrading replica set.."); +rst.upgradeSet({binVersion: oldVersion}); +jsTest.log("Downgrade complete."); - joinFindInsert(); - rst.stopSet(); -} +primary = rst.getPrimary(); +printjson(rst.status()); -runDowngradeTest(0); -runDowngradeTest(1); +joinFindInsert(); +rst.stopSet(); diff --git a/jstests/multiVersion/initialsync.js b/jstests/multiVersion/initialsync.js index a36d538a6f8..d8c1d629fd0 100644 --- a/jstests/multiVersion/initialsync.js +++ b/jstests/multiVersion/initialsync.js @@ -7,15 +7,13 @@ var newVersion = "latest"; var name = "multiversioninitsync"; -var multitest = function(replSetVersion, newNodeVersion, configSettings) { +var multitest = function(replSetVersion, newNodeVersion) { var nodes = {n1: {binVersion: replSetVersion}, n2: {binVersion: replSetVersion}}; print("Start up a two-node " + replSetVersion + " replica set."); var rst = new ReplSetTest({name: name, nodes: nodes}); rst.startSet(); - var conf = rst.getReplSetConfig(); - conf.settings = configSettings; - rst.initiate(conf); + rst.initiate(); // Wait for a primary node. var primary = rst.getPrimary(); @@ -52,5 +50,4 @@ multitest(oldVersion, newVersion); // Old Secondary is synced from a "latest" // version ReplSet. // ***************************************** -// Hard-code catchup timeout. The default timeout on 3.5 is -1, which is invalid on 3.4. -multitest(newVersion, oldVersion, {catchUpTimeoutMillis: 2000}); +multitest(newVersion, oldVersion); diff --git a/jstests/replsets/catchup.js b/jstests/replsets/catchup.js index 51632379463..542ad51c723 100644 --- a/jstests/replsets/catchup.js +++ b/jstests/replsets/catchup.js @@ -12,7 +12,6 @@ rst.startSet(); var conf = rst.getReplSetConfig(); - conf.members[2].priority = 0; conf.settings = { heartbeatIntervalMillis: 500, electionTimeoutMillis: 10000, @@ -35,7 +34,7 @@ node.adminCommand(verbosity); }); - function stepUpNode(node) { + function stepUp(node) { assert.soon(function() { node.adminCommand({replSetStepUp: 1}); return node.adminCommand('replSetGetStatus').myState == ReplSetTest.State.PRIMARY; @@ -44,6 +43,12 @@ return node; } + function doWrites(node) { + for (var i = 0; i < 3; i++) { + assert.writeOK(node.getDB("test").foo.insert({x: i})); + } + } + function checkOpInOplog(node, op, count) { node.getDB("admin").getMongo().setSlaveOk(); var oplog = node.getDB("local")['oplog.rs']; @@ -51,148 +56,98 @@ assert.eq(oplog.count(op), count, "op: " + tojson(op) + ", oplog: " + tojson(oplogArray)); } - // Stop replication on secondaries, do writes and step up one of the secondaries. - // - // The old primary has extra writes that are not replicated to the other nodes yet, - // but the new primary steps up, getting the vote from the the third node "voter". - function stopRelicationAndEnforceNewPrimaryToCatchUp() { - // Write documents that cannot be replicated to secondaries in time. - var oldSecondaries = rst.getSecondaries(); - var oldPrimary = rst.getPrimary(); - stopServerReplication(oldSecondaries); - for (var i = 0; i < 3; i++) { - assert.writeOK(oldPrimary.getDB("test").foo.insert({x: i})); + function isEarlierTimestamp(ts1, ts2) { + if (ts1.getTime() == ts2.getTime()) { + return ts1.getInc() < ts2.getInc(); } - var latestOpOnOldPrimary = getLatestOp(oldPrimary); - // New primary wins immediately, but needs to catch up. - var newPrimary = stepUpNode(oldSecondaries[0]); - rst.awaitNodesAgreeOnPrimary(); - var latestOpOnNewPrimary = getLatestOp(newPrimary); - // Check this node is not writable. - assert.eq(newPrimary.getDB("test").isMaster().ismaster, false); - - return { - oldSecondaries: oldSecondaries, - oldPrimary: oldPrimary, - newPrimary: newPrimary, - voter: oldSecondaries[1], - latestOpOnOldPrimary: latestOpOnOldPrimary, - latestOpOnNewPrimary: latestOpOnNewPrimary - }; - } - - function reconfigCatchUpTimeoutMillis(timeout) { - // Reconnect all nodes to make sure reconfig succeeds. - rst.nodes.forEach(reconnect); - // Reconfigure replicaset to decrease catchup timeout - conf = rst.getReplSetConfigFromNode(); - conf.version++; - conf.settings.catchUpTimeoutMillis = timeout; - reconfig(rst, conf); - rst.awaitReplication(); - rst.awaitNodesAgreeOnPrimary(); + return ts1.getTime() < ts2.getTime(); } - rst.awaitReplication(); + rst.awaitReplication(ReplSetTest.kDefaultTimeoutMS, ReplSetTest.OpTimeType.LAST_DURABLE); - jsTest.log("Case 1: The primary is up-to-date after refreshing heartbeats."); + jsTest.log("Case 1: The primary is up-to-date after freshness scan."); // Should complete transition to primary immediately. - var newPrimary = stepUpNode(rst.getSecondary()); + var newPrimary = stepUp(rst.getSecondary()); rst.awaitNodesAgreeOnPrimary(); // Should win an election and finish the transition very quickly. assert.eq(newPrimary, rst.getPrimary()); - rst.awaitReplication(); + rst.awaitReplication(ReplSetTest.kDefaultTimeoutMS, ReplSetTest.OpTimeType.LAST_DURABLE); jsTest.log("Case 2: The primary needs to catch up, succeeds in time."); - var stepUpResults = stopRelicationAndEnforceNewPrimaryToCatchUp(); - + // Write documents that cannot be replicated to secondaries in time. + var originalSecondaries = rst.getSecondaries(); + stopServerReplication(originalSecondaries); + doWrites(rst.getPrimary()); + var latestOp = getLatestOp(rst.getPrimary()); + // New primary wins immediately, but needs to catch up. + newPrimary = stepUp(rst.getSecondary()); + rst.awaitNodesAgreeOnPrimary(); + // Check this node is not writable. + assert.eq(newPrimary.getDB("test").isMaster().ismaster, false); // Disable fail point to allow replication. - restartServerReplication(stepUpResults.oldSecondaries); + restartServerReplication(originalSecondaries); // getPrimary() blocks until the primary finishes drain mode. - assert.eq(stepUpResults.newPrimary, rst.getPrimary()); + assert.eq(newPrimary, rst.getPrimary()); // Wait for all secondaries to catch up rst.awaitReplication(); // Check the latest op on old primary is preserved on the new one. - checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 1); - rst.awaitReplication(); + checkOpInOplog(newPrimary, latestOp, 1); + rst.awaitReplication(ReplSetTest.kDefaultTimeoutMS, ReplSetTest.OpTimeType.LAST_DURABLE); jsTest.log("Case 3: The primary needs to catch up, but has to change sync source to catch up."); - stepUpResults = stopRelicationAndEnforceNewPrimaryToCatchUp(); - - // Disable fail point on the voter. Wait until it catches up with the old primary. - restartServerReplication(stepUpResults.voter); - assert.commandWorked( - stepUpResults.voter.adminCommand({replSetSyncFrom: stepUpResults.oldPrimary.host})); - awaitOpTime(stepUpResults.voter, stepUpResults.latestOpOnOldPrimary.ts); + // Write documents that cannot be replicated to secondaries in time. + stopServerReplication(rst.getSecondaries()); + doWrites(rst.getPrimary()); + var oldPrimary = rst.getPrimary(); + originalSecondaries = rst.getSecondaries(); + latestOp = getLatestOp(oldPrimary); + newPrimary = stepUp(originalSecondaries[0]); + rst.awaitNodesAgreeOnPrimary(); + // Disable fail point on one of the other secondaries. + // Wait until it catches up with the old primary. + restartServerReplication(originalSecondaries[1]); + assert.commandWorked(originalSecondaries[1].adminCommand({replSetSyncFrom: oldPrimary.host})); + awaitOpTime(originalSecondaries[1], latestOp.ts); // Disconnect the new primary and the old one. - stepUpResults.oldPrimary.disconnect(stepUpResults.newPrimary); + oldPrimary.disconnect(newPrimary); // Disable the failpoint, the new primary should sync from the other secondary. - restartServerReplication(stepUpResults.newPrimary); - assert.eq(stepUpResults.newPrimary, rst.getPrimary()); - checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 1); + restartServerReplication(newPrimary); + assert.eq(newPrimary, rst.getPrimary()); + checkOpInOplog(newPrimary, latestOp, 1); // Restore the broken connection - stepUpResults.oldPrimary.reconnect(stepUpResults.newPrimary); - rst.awaitReplication(); + oldPrimary.reconnect(newPrimary); + rst.awaitReplication(ReplSetTest.kDefaultTimeoutMS, ReplSetTest.OpTimeType.LAST_DURABLE); jsTest.log("Case 4: The primary needs to catch up, fails due to timeout."); - reconfigCatchUpTimeoutMillis(10 * 1000); - - stepUpResults = stopRelicationAndEnforceNewPrimaryToCatchUp(); - // Wait until the new primary completes the transition to primary and writes a no-op. - checkLog.contains(stepUpResults.newPrimary, "Catchup timed out after becoming primary"); - restartServerReplication(stepUpResults.newPrimary); - assert.eq(stepUpResults.newPrimary, rst.getPrimary()); - - // Wait for the no-op "new primary" after winning an election, so that we know it has - // finished transition to primary. - assert.soon(function() { - return rs.compareOpTimes(stepUpResults.latestOpOnOldPrimary, - getLatestOp(stepUpResults.newPrimary)) < 0; - }); - // The extra oplog entries on the old primary are not replicated to the new one. - checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 0); - restartServerReplication(stepUpResults.voter); - rst.awaitReplication(); + // Reconfigure replicaset to decrease catchup timeout + conf = rst.getReplSetConfigFromNode(); + conf.version++; + conf.settings.catchUpTimeoutMillis = 10 * 1000; + reconfig(rst, conf); + rst.awaitReplication(ReplSetTest.kDefaultTimeoutMS, ReplSetTest.OpTimeType.LAST_DURABLE); + rst.awaitNodesAgreeOnPrimary(); - jsTest.log("Case 5: The primary needs to catch up with no timeout, then gets aborted."); - reconfigCatchUpTimeoutMillis(-1); - stepUpResults = stopRelicationAndEnforceNewPrimaryToCatchUp(); + // Write documents that cannot be replicated to secondaries in time. + originalSecondaries = rst.getSecondaries(); + stopServerReplication(originalSecondaries); + doWrites(rst.getPrimary()); + latestOp = getLatestOp(rst.getPrimary()); - // Abort catchup. - assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetAbortPrimaryCatchUp: 1})); + // New primary wins immediately, but needs to catch up. + newPrimary = stepUp(originalSecondaries[0]); + rst.awaitNodesAgreeOnPrimary(); + var latestOpOnNewPrimary = getLatestOp(newPrimary); + // Wait until the new primary completes the transition to primary and writes a no-op. + checkLog.contains(newPrimary, "Cannot catch up oplog after becoming primary"); + restartServerReplication(newPrimary); + assert.eq(newPrimary, rst.getPrimary()); // Wait for the no-op "new primary" after winning an election, so that we know it has // finished transition to primary. assert.soon(function() { - return rs.compareOpTimes(stepUpResults.latestOpOnOldPrimary, - getLatestOp(stepUpResults.newPrimary)) < 0; + return isEarlierTimestamp(latestOpOnNewPrimary.ts, getLatestOp(newPrimary).ts); }); // The extra oplog entries on the old primary are not replicated to the new one. - checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 0); - restartServerReplication(stepUpResults.oldSecondaries); - rst.awaitReplication(); - checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 0); - - // TODO: Uncomment case 6 when SERVER-28751 gets fixed. - // - // jsTest.log("Case 6: The primary needs to catch up with no timeout, but steps down."); - // var stepUpResults = stopRelicationAndEnforceNewPrimaryToCatchUp(); - - // // Step-down command should abort catchup. - // try { - // printjson(stepUpResults.newPrimary.adminCommand({replSetStepDown: 60})); - // } catch (e) { - // print(e); - // } - // // Rename the primary. - // var steppedDownPrimary = stepUpResults.newPrimary; - // var newPrimary = rst.getPrimary(); - // assert.neq(newPrimary, steppedDownPrimary); - - // // Enable data replication on the stepped down primary and make sure it syncs old writes. - // rst.nodes.forEach(reconnect); - // restartServerReplication(stepUpResults.oldSecondaries); - // rst.awaitReplication(); - // checkOpInOplog(steppedDownPrimary, stepUpResults.latestOpOnOldPrimary, 1); - + checkOpInOplog(newPrimary, latestOp, 0); + restartServerReplication(originalSecondaries[1]); })(); diff --git a/jstests/replsets/rslib.js b/jstests/replsets/rslib.js index 5911723d717..1471824bd8f 100644 --- a/jstests/replsets/rslib.js +++ b/jstests/replsets/rslib.js @@ -162,7 +162,6 @@ var getLastOpTime; if (!isNetworkError(e)) { throw e; } - print("Calling replSetReconfig failed. " + tojson(e)); } var master = rs.getPrimary().getDB("admin"); |