summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRamon Fernandez <ramon@mongodb.com>2017-04-20 17:43:39 -0400
committerRamon Fernandez <ramon@mongodb.com>2017-04-20 17:43:39 -0400
commit94a762ba470c21db4db14afb692c1b7dc5f4c69b (patch)
tree64d34c2be2d14dbba211f3f39670eb3adfccfe23
parent8d35bbbf1c1c08969eb40fee5144ff01b503273e (diff)
downloadmongo-94a762ba470c21db4db14afb692c1b7dc5f4c69b.tar.gz
Revert "SERVER-26360 Node should be able to leave RECOVERING after going too stale"
This reverts commit 31a2a5bcec525c9dc62cfdd06d126673f507c8df.
-rw-r--r--jstests/replsets/too_stale_secondary.js140
-rw-r--r--jstests/replsets/toostale.js126
-rw-r--r--src/mongo/db/repl/bgsync.cpp24
-rw-r--r--src/mongo/db/repl/bgsync.h42
4 files changed, 139 insertions, 193 deletions
diff --git a/jstests/replsets/too_stale_secondary.js b/jstests/replsets/too_stale_secondary.js
deleted file mode 100644
index 369662e5f16..00000000000
--- a/jstests/replsets/too_stale_secondary.js
+++ /dev/null
@@ -1,140 +0,0 @@
-/**
- * This test ensures that a secondary that has gone "too stale" (i.e. cannot find another node with
- * a common oplog point) will transition to RECOVERING state, stay in RECOVERING after restart, and
- * transition back to SECONDARY once it finds a sync source with a common oplog point.
- *
- * Note: This test requires persistence in order for a restarted node with a stale oplog to stay in
- * the RECOVERING state. A restarted node with an ephemeral storage engine will not have an oplog
- * upon restart, so will immediately resync.
- *
- * @tags: [requires_persistence]
- *
- * Replica Set Setup:
- *
- * Node 0 (PRIMARY) : Small Oplog
- * Node 1 (SECONDARY) : Large Oplog
- * Node 2 (SECONDARY) : Small Oplog
- *
- * 1: Insert one document on the primary (Node 0) and ensure it is replicated.
- * 2: Stop node 2.
- * 3: Wait until Node 2 is down.
- * 4: Overflow the primary's oplog.
- * 5: Stop Node 1 and restart Node 2.
- * 6: Wait for Node 2 to transition to RECOVERING (it should be too stale).
- * 7: Stop and restart Node 2.
- * 8: Wait for Node 2 to transition to RECOVERING (its oplog should remain stale after restart).
- * 9: Restart Node 1, which should have the full oplog history.
- * 10: Wait for Node 2 to leave RECOVERING and transition to SECONDARY.
- *
- */
-
-(function() {
- load('jstests/replsets/rslib.js');
-
- "use strict";
-
- function getFirstOplogEntry(conn) {
- return conn.getDB('local').oplog.rs.find().sort({ts: 1}).limit(1)[0];
- }
-
- /**
- * Overflows the oplog of a given node.
- *
- * To detect oplog overflow, we continuously insert large documents until we
- * detect that the first entry of the oplog is no longer the same as when we started. This
- * implies that the oplog attempted to grow beyond its maximum size i.e. it
- * has overflowed/rolled over.
- *
- * Each document will be inserted with a writeConcern given by 'writeConcern'.
- *
- */
- function overflowOplog(conn, db, writeConcern) {
- var firstOplogEntry = getFirstOplogEntry(primary);
- var collName = "overflow";
-
- // Keep inserting large documents until the oplog rolls over.
- const largeStr = new Array(32 * 1024).join('aaaaaaaa');
- while (bsonWoCompare(getFirstOplogEntry(conn), firstOplogEntry) === 0) {
- assert.writeOK(
- db[collName].insert({data: largeStr}, {writeConcern: {w: writeConcern}}));
- }
- }
-
- var testName = "too_stale_secondary";
-
- var smallOplogSizeMB = 1;
- var bigOplogSizeMB = 1000;
-
- // Node 0 is given a small oplog so we can overflow it. Node 1's large oplog allows it to store
- // all entries comfortably without overflowing, so that Node 2 can eventually use it as a sync
- // source after it goes too stale.
- var replTest = new ReplSetTest({
- name: testName,
- nodes: [
- {oplogSize: smallOplogSizeMB},
- {oplogSize: bigOplogSizeMB},
- {oplogSize: smallOplogSizeMB}
- ]
- });
-
- var nodes = replTest.startSet();
- replTest.initiate({
- _id: testName,
- members: [
- {_id: 0, host: nodes[0].host},
- {_id: 1, host: nodes[1].host, priority: 0},
- {_id: 2, host: nodes[2].host, priority: 0}
- ]
- });
-
- var dbName = testName;
- var collName = "test";
-
- jsTestLog("Wait for Node 0 to become the primary.");
- replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY);
-
- var primary = replTest.getPrimary();
- var primaryTestDB = primary.getDB(dbName);
-
- jsTestLog("1: Insert one document on the primary (Node 0) and ensure it is replicated.");
- assert.writeOK(primaryTestDB[collName].insert({a: 1}, {writeConcern: {w: 3}}));
-
- jsTestLog("2: Stop Node 2.");
- replTest.stop(2);
-
- jsTestLog("3: Wait until Node 2 is down.");
- replTest.waitForState(replTest.nodes[2], ReplSetTest.State.DOWN);
-
- var firstOplogEntryNode1 = getFirstOplogEntry(replTest.nodes[1]);
-
- jsTestLog("4: Overflow the primary's oplog.");
- overflowOplog(primary, primaryTestDB, 2);
-
- // Make sure that Node 1's oplog didn't overflow.
- assert.eq(firstOplogEntryNode1,
- getFirstOplogEntry(replTest.nodes[1]),
- "Node 1's oplog overflowed unexpectedly.");
-
- jsTestLog("5: Stop Node 1 and restart Node 2.");
- replTest.stop(1);
- replTest.restart(2);
-
- jsTestLog("6: Wait for Node 2 to transition to RECOVERING (it should be too stale).");
- replTest.waitForState(replTest.nodes[2], ReplSetTest.State.RECOVERING);
-
- jsTestLog("7: Stop and restart Node 2.");
- replTest.stop(2);
- replTest.restart(2);
-
- jsTestLog(
- "8: Wait for Node 2 to transition to RECOVERING (its oplog should remain stale after restart)");
- replTest.waitForState(replTest.nodes[2], ReplSetTest.State.RECOVERING);
-
- jsTestLog("9: Restart Node 1, which should have the full oplog history.");
- replTest.restart(1);
-
- jsTestLog("10: Wait for Node 2 to leave RECOVERING and transition to SECONDARY.");
- replTest.waitForState(replTest.nodes[2], ReplSetTest.State.SECONDARY);
-
- replTest.stopSet();
-}()); \ No newline at end of file
diff --git a/jstests/replsets/toostale.js b/jstests/replsets/toostale.js
new file mode 100644
index 00000000000..4f1a1057757
--- /dev/null
+++ b/jstests/replsets/toostale.js
@@ -0,0 +1,126 @@
+// This tests that:
+// * stale members get into state 3 (recovering)
+// * they stay in state 3 after restarting
+// * they can recover and go into state 2 if someone less up-to-date becomes primary
+//
+// This test requires persistence in order for a restarted node with a stale oplog to stay in the
+// RECOVERING state. A restarted node with an ephemeral storage engine will not have an oplog upon
+// restart, so will immediately resync.
+// @tags: [requires_persistence]
+
+/**
+ * 1: initial insert
+ * 2: initial sync
+ * 3: blind s2
+ * 4: overflow oplog
+ * 5: unblind s2
+ * 6: check s2.state == 3
+ * 7: restart s2
+ * 8: check s2.state == 3
+ */
+
+var w = 0;
+var wait = function(f) {
+ w++;
+ var n = 0;
+ while (!f()) {
+ if (n % 4 == 0)
+ print("toostale.js waiting " + w);
+ if (++n == 4) {
+ print("" + f);
+ }
+ assert(n < 200, 'tried 200 times, giving up');
+ sleep(1000);
+ }
+};
+
+var reconnect = function(a) {
+ wait(function() {
+ try {
+ a.bar.stats();
+ return true;
+ } catch (e) {
+ print(e);
+ return false;
+ }
+ });
+};
+
+var name = "toostale";
+var replTest = new ReplSetTest({name: name, nodes: 3, oplogSize: 5});
+var host = getHostName();
+
+var nodes = replTest.startSet();
+replTest.initiate({
+ _id: name,
+ members: [
+ {_id: 0, host: host + ":" + replTest.ports[0], priority: 2},
+ {_id: 1, host: host + ":" + replTest.ports[1], arbiterOnly: true},
+ {_id: 2, host: host + ":" + replTest.ports[2], priority: 0}
+ ]
+});
+var master = replTest.getPrimary();
+var mdb = master.getDB("foo");
+
+print("1: initial insert");
+mdb.foo.save({a: 1000});
+
+print("2: initial sync");
+replTest.awaitReplication();
+
+print("3: stop s2");
+replTest.stop(2);
+print("waiting until the master knows the slave is blind");
+assert.soon(function() {
+ return master.getDB("admin").runCommand({replSetGetStatus: 1}).members[2].health == 0;
+});
+print("okay");
+
+print("4: overflow oplog");
+reconnect(master.getDB("local"));
+var count = master.getDB("local").oplog.rs.find().itcount();
+var prevCount = -1;
+while (count > prevCount) {
+ print("inserting 1000");
+ var bulk = mdb.bar.initializeUnorderedBulkOp();
+ for (var i = 0; i < 1000; i++) {
+ bulk.insert({x: i, date: new Date(), str: "safkaldmfaksndfkjansfdjanfjkafa"});
+ }
+ assert.writeOK(bulk.execute());
+
+ prevCount = count;
+ replTest.awaitReplication();
+ count = master.getDB("local").oplog.rs.find().itcount();
+ print("count: " + count + " prev: " + prevCount);
+}
+
+print("5: restart s2");
+replTest.restart(2);
+print("waiting until the master knows the slave is not blind");
+assert.soon(function() {
+ return master.getDB("admin").runCommand({replSetGetStatus: 1}).members[2].health != 0;
+});
+print("okay");
+
+print("6: check s2.state == 3");
+var goStale = function() {
+ wait(function() {
+ var status = master.getDB("admin").runCommand({replSetGetStatus: 1});
+ printjson(status);
+ return status.members[2].state == 3;
+ });
+};
+goStale();
+
+print("7: restart s2");
+replTest.stop(2);
+replTest.restart(2);
+
+print("8: check s2.state == 3");
+assert.soon(function() {
+ var status = master.getDB("admin").runCommand({replSetGetStatus: 1});
+ printjson(status);
+ return status.members && status.members[2].state == 3;
+});
+
+replTest.stop(0);
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp
index 41781d2fcc9..308e6c7ecce 100644
--- a/src/mongo/db/repl/bgsync.cpp
+++ b/src/mongo/db/repl/bgsync.cpp
@@ -315,20 +315,10 @@ void BackgroundSync::_produce(OperationContext* opCtx) {
return;
}
- // We only need to mark ourselves as too stale once.
- if (_tooStale) {
- return;
- }
-
- // Mark yourself as too stale.
- _tooStale = true;
-
error() << "too stale to catch up -- entering maintenance mode";
log() << "Our newest OpTime : " << lastOpTimeFetched;
log() << "Earliest OpTime available is " << syncSourceResp.earliestOpTimeSeen;
log() << "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember";
-
- // Activate maintenance mode and transition to RECOVERING.
auto status = _replCoord->setMaintenanceMode(true);
if (!status.isOK()) {
warning() << "Failed to transition into maintenance mode: " << status;
@@ -363,20 +353,6 @@ void BackgroundSync::_produce(OperationContext* opCtx) {
return;
}
- // If we find a good sync source after having gone too stale, disable maintenance mode so we can
- // transition to SECONDARY.
- if (_tooStale) {
-
- _tooStale = false;
-
- log() << "No longer too stale. Able to sync from " << _syncSourceHost;
-
- auto status = _replCoord->setMaintenanceMode(false);
- if (!status.isOK()) {
- warning() << "Failed to leave maintenance mode: " << status;
- }
- }
-
long long lastHashFetched;
{
stdx::lock_guard<stdx::mutex> lock(_mutex);
diff --git a/src/mongo/db/repl/bgsync.h b/src/mongo/db/repl/bgsync.h
index 6d068967723..ead036c79d2 100644
--- a/src/mongo/db/repl/bgsync.h
+++ b/src/mongo/db/repl/bgsync.h
@@ -174,47 +174,31 @@ private:
// A pointer to the replication coordinator external state.
ReplicationCoordinatorExternalState* _replicationCoordinatorExternalState;
- /**
- * All member variables are labeled with one of the following codes indicating the
- * synchronization rules for accessing them:
- *
- * (PR) Completely private to BackgroundSync. Can be read or written to from within the main
- * BackgroundSync thread without synchronization. Shouldn't be accessed outside of this
- * thread.
- *
- * (S) Self-synchronizing; access in any way from any context.
- *
- * (M) Reads and writes guarded by _mutex
- *
- */
+ // _mutex protects all of the class variables declared below.
+ //
+ // Never hold bgsync mutex when trying to acquire the ReplicationCoordinator mutex.
+ mutable stdx::mutex _mutex;
- // Protects member data of BackgroundSync.
- // Never hold the BackgroundSync mutex when trying to acquire the ReplicationCoordinator mutex.
- mutable stdx::mutex _mutex; // (S)
+ OpTime _lastOpTimeFetched;
- OpTime _lastOpTimeFetched; // (M)
-
- // lastFetchedHash is used to match ops to determine if we need to rollback, when a secondary.
- long long _lastFetchedHash = 0LL; // (M)
+ // lastFetchedHash is used to match ops to determine if we need to rollback, when
+ // a secondary.
+ long long _lastFetchedHash = 0LL;
// Thread running producerThread().
- std::unique_ptr<stdx::thread> _producerThread; // (M)
+ std::unique_ptr<stdx::thread> _producerThread;
// Set to true if shutdown() has been called.
- bool _inShutdown = false; // (M)
-
- // Flag that marks whether a node's oplog has no common point with any
- // potential sync sources.
- bool _tooStale = false; // (PR)
+ bool _inShutdown = false;
- ProducerState _state = ProducerState::Starting; // (M)
+ ProducerState _state = ProducerState::Starting;
- HostAndPort _syncSourceHost; // (M)
+ HostAndPort _syncSourceHost;
// Current sync source resolver validating sync source candidates.
// Pointer may be read on any thread that locks _mutex or unlocked on the BGSync thread. It can
// only be written to by the BGSync thread while holding _mutex.
- std::unique_ptr<SyncSourceResolver> _syncSourceResolver; // (M)
+ std::unique_ptr<SyncSourceResolver> _syncSourceResolver;
// Current oplog fetcher tailing the oplog on the sync source.
std::unique_ptr<OplogFetcher> _oplogFetcher;