summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJudah Schvimer <judah@mongodb.com>2016-05-24 10:20:44 -0400
committerJudah Schvimer <judah@mongodb.com>2016-09-06 10:00:43 -0400
commit9f4ee281fb0f2a0840e2b1ae5fd875bee2587ee1 (patch)
tree95767b97cd03804655cdd53cf3fcbbbd1111281a
parent693717b18d85ad76f7b373246ecddf61811d8bf2 (diff)
downloadmongo-9f4ee281fb0f2a0840e2b1ae5fd875bee2587ee1.tar.gz
SERVER-24254 Wait for nodes to agree on primary in maxSyncSourceLagSecs.js and chaining_removal.js
(cherry picked from commit 60f64e0b438d268a07a517203990cb65a7ca5f60)
-rw-r--r--jstests/replsets/chaining_removal.js1
-rw-r--r--jstests/replsets/maxSyncSourceLagSecs.js32
-rw-r--r--src/mongo/shell/replsettest.js44
3 files changed, 69 insertions, 8 deletions
diff --git a/jstests/replsets/chaining_removal.js b/jstests/replsets/chaining_removal.js
index 29b50609754..1685bef8853 100644
--- a/jstests/replsets/chaining_removal.js
+++ b/jstests/replsets/chaining_removal.js
@@ -20,6 +20,7 @@
],
});
replTest.waitForState(nodes[0], ReplSetTest.State.PRIMARY, 60 * 1000);
+ replTest.awaitNodesAgreeOnPrimary();
var primary = replTest.getPrimary();
replTest.awaitReplication();
diff --git a/jstests/replsets/maxSyncSourceLagSecs.js b/jstests/replsets/maxSyncSourceLagSecs.js
index 0e7fe04355b..6bcec4a3f71 100644
--- a/jstests/replsets/maxSyncSourceLagSecs.js
+++ b/jstests/replsets/maxSyncSourceLagSecs.js
@@ -21,36 +21,52 @@
{"_id": 2, "host": nodes[2], priority: 0}
],
});
+ replTest.awaitNodesAgreeOnPrimary();
var master = replTest.getPrimary();
+ var slaves = replTest.liveNodes.slaves;
+ assert.commandWorked(slaves[0].getDB("admin").runCommand({replSetSyncFrom: master.name}));
+ assert.commandWorked(slaves[1].getDB("admin").runCommand({replSetSyncFrom: master.name}));
master.getDB("foo").bar.save({a: 1});
replTest.awaitReplication();
- var slaves = replTest.liveNodes.slaves;
// need to put at least maxSyncSourceLagSecs b/w first op and subsequent ops
// so that the shouldChangeSyncSource logic goes into effect
sleep(4000);
jsTestLog("Setting sync target of slave 2 to slave 1");
- assert.commandWorked(slaves[1].getDB("admin").runCommand({replSetSyncFrom: slaves[0].name}));
assert.soon(function() {
- var res = slaves[1].getDB("admin").runCommand({"replSetGetStatus": 1});
- return res.syncingTo === slaves[0].name;
- }, "sync target not changed to other slave");
+ // We do a write each time and have this in a try...catch block due to the fallout of
+ // SERVER-24114. If that timeout occurs, then we search for another sync source, however we
+ // will not find one unless more writes have come in. Additionally, it is possible that
+ // slaves[1] will switch to sync from slaves[0] after slaves[1] replicates a write from
+ // the primary but before slaves[0] replicates it. slaves[1] will then have to roll back
+ // which would cause a network error.
+ try {
+ slaves[1].getDB("admin").runCommand({replSetSyncFrom: slaves[0].name});
+ var res = slaves[1].getDB("admin").runCommand({"replSetGetStatus": 1});
+ master.getDB("foo").bar.insert({a: 1});
+ return res.syncingTo === slaves[0].name;
+ } catch (e) {
+ print("Exception in assert.soon, retrying: " + e);
+ return false;
+ }
+ }, "sync target not changed to other slave", 100 * 1000, 2 * 1000);
printjson(replTest.status());
jsTestLog("Lock slave 1 and add some docs. Force sync target for slave 2 to change to primary");
assert.commandWorked(slaves[0].getDB("admin").runCommand({fsync: 1, lock: 1}));
- master.getDB("foo").bar.save({a: 2});
assert.soon(function() {
+ master.getDB("foo").bar.insert({a: 2});
var res = slaves[1].getDB("admin").runCommand({"replSetGetStatus": 1});
return res.syncingTo === master.name;
- }, "sync target not changed back to primary");
+ }, "sync target not changed back to primary", 100 * 1000, 2 * 1000);
printjson(replTest.status());
assert.soon(function() {
- return (slaves[1].getDB("foo").bar.count() === 2);
+ return (slaves[1].getDB("foo").bar.count({a: 1}) > 0 &&
+ slaves[1].getDB("foo").bar.count({a: 2}) > 0);
}, "slave should have caught up after syncing to primary.");
assert.commandWorked(slaves[0].getDB("admin").fsyncUnlock());
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index 71e6ca89336..8e6d5a05525 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -422,6 +422,50 @@ var ReplSetTest = function(opts) {
};
/**
+ * Blocks until all nodes agree on who the primary is.
+ */
+ this.awaitNodesAgreeOnPrimary = function(timeout) {
+ timeout = timeout || 60000;
+
+ assert.soon(function() {
+ try {
+ var primary = -1;
+
+ for (var i = 0; i < self.nodes.length; i++) {
+ var replSetGetStatus =
+ self.nodes[i].getDB("admin").runCommand({replSetGetStatus: 1});
+ var nodesPrimary = -1;
+ for (var j = 0; j < replSetGetStatus.members.length; j++) {
+ if (replSetGetStatus.members[j].state === ReplSetTest.State.PRIMARY) {
+ // Node sees two primaries.
+ if (nodesPrimary !== -1) {
+ return false;
+ }
+ nodesPrimary = j;
+ }
+ }
+ // Node doesn't see a primary.
+ if (nodesPrimary < 0) {
+ return false;
+ }
+
+ if (primary < 0) {
+ // If we haven't seen a primary yet, set it to this.
+ primary = nodesPrimary;
+ } else if (primary !== nodesPrimary) {
+ return false;
+ }
+ }
+
+ return true;
+ } catch (e) {
+ print("caught exception " + e);
+ return false;
+ }
+ }, "Awaiting nodes to agree on primary", timeout);
+ };
+
+ /**
* Blocking call, which will wait for a primary to be elected for some pre-defined timeout and
* if primary is available will return a connection to it. Otherwise throws an exception.
*/