From f7bb498a2544a9b513d6d361c0f6950520d82846 Mon Sep 17 00:00:00 2001 From: Janna Golden Date: Mon, 7 Oct 2019 20:18:45 +0000 Subject: SERVER-36222 Wait for rollback is racy in shard_identity_rollback.js (cherry picked from commit f14c26dfc3975f0c98af26573ab91b5d9c09ee71) --- jstests/sharding/shard_identity_rollback.js | 34 ++++++++++++----------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/jstests/sharding/shard_identity_rollback.js b/jstests/sharding/shard_identity_rollback.js index b0a3f9b891c..30478093e2c 100644 --- a/jstests/sharding/shard_identity_rollback.js +++ b/jstests/sharding/shard_identity_rollback.js @@ -77,30 +77,21 @@ var newPriConn = replTest.getPrimary(); assert.neq(priConn, newPriConn); assert.writeOK(newPriConn.getDB('test').foo.insert({a: 1}, {writeConcern: {w: 'majority'}})); -// Restart the original primary so it triggers a rollback of the shardIdentity insert. +// Restart the original primary so it triggers a rollback of the shardIdentity insert. Pass +// {waitForConnect : false} to avoid a race condition between the node crashing (which we expect) +// and waiting to be able to connect to the node. jsTest.log("Restarting original primary"); -priConn = replTest.restart(priConn); +priConn = replTest.start(priConn, {waitForConnect: false}, true); // Wait until we cannot create a connection to the former primary, which indicates that it must // have shut itself down during the rollback. jsTest.log("Waiting for original primary to rollback and shut down"); -assert.soon( - function() { - try { - var newConn = new Mongo(priConn.host); - return false; - } catch (x) { - return true; - } - }, - function() { - var oldPriOplog = priConn.getDB('local').oplog.rs.find().sort({$natural: -1}).toArray(); - var newPriOplog = newPriConn.getDB('local').oplog.rs.find().sort({$natural: -1}).toArray(); - return "timed out waiting for original primary to shut down after rollback. " + - "Old primary oplog: " + tojson(oldPriOplog) + - "; new primary oplog: " + tojson(newPriOplog); - }, - 90000); +// Wait until the node shuts itself down during the rollback. We will hit the first assertion if +// we rollback using 'recoverToStableTimestamp' and the second if using 'rollbackViaRefetch'. +assert.soon(() => { + return (rawMongoProgramOutput().indexOf("Fatal Assertion 50712") !== -1 || + rawMongoProgramOutput().indexOf("Fatal Assertion 40498") !== -1); +}); // Restart the original primary again. This time, the shardIdentity document should already be // rolled back, so there shouldn't be any rollback and the node should stay online. @@ -112,7 +103,10 @@ try { } catch (e) { // expected } -priConn = replTest.restart(priConn, {shardsvr: ''}); +// Since we pass "restart: true" here, the node will start with the same options as above unless +// specified. We do want to wait to be able to connect to the node here however, so we need to pass +// {waitForConnect: true}. +priConn = replTest.start(priConn.nodeId, {shardsvr: '', waitForConnect: true}, true); priConn.setSlaveOk(); // Wait for the old primary to replicate the document that was written to the new primary while -- cgit v1.2.1