diff options
author | Ramon Fernandez <ramon@mongodb.com> | 2017-04-20 17:43:39 -0400 |
---|---|---|
committer | Ramon Fernandez <ramon@mongodb.com> | 2017-04-20 17:43:39 -0400 |
commit | 94a762ba470c21db4db14afb692c1b7dc5f4c69b (patch) | |
tree | 64d34c2be2d14dbba211f3f39670eb3adfccfe23 | |
parent | 8d35bbbf1c1c08969eb40fee5144ff01b503273e (diff) | |
download | mongo-94a762ba470c21db4db14afb692c1b7dc5f4c69b.tar.gz |
Revert "SERVER-26360 Node should be able to leave RECOVERING after going too stale"
This reverts commit 31a2a5bcec525c9dc62cfdd06d126673f507c8df.
-rw-r--r-- | jstests/replsets/too_stale_secondary.js | 140 | ||||
-rw-r--r-- | jstests/replsets/toostale.js | 126 | ||||
-rw-r--r-- | src/mongo/db/repl/bgsync.cpp | 24 | ||||
-rw-r--r-- | src/mongo/db/repl/bgsync.h | 42 |
4 files changed, 139 insertions, 193 deletions
diff --git a/jstests/replsets/too_stale_secondary.js b/jstests/replsets/too_stale_secondary.js deleted file mode 100644 index 369662e5f16..00000000000 --- a/jstests/replsets/too_stale_secondary.js +++ /dev/null @@ -1,140 +0,0 @@ -/** - * This test ensures that a secondary that has gone "too stale" (i.e. cannot find another node with - * a common oplog point) will transition to RECOVERING state, stay in RECOVERING after restart, and - * transition back to SECONDARY once it finds a sync source with a common oplog point. - * - * Note: This test requires persistence in order for a restarted node with a stale oplog to stay in - * the RECOVERING state. A restarted node with an ephemeral storage engine will not have an oplog - * upon restart, so will immediately resync. - * - * @tags: [requires_persistence] - * - * Replica Set Setup: - * - * Node 0 (PRIMARY) : Small Oplog - * Node 1 (SECONDARY) : Large Oplog - * Node 2 (SECONDARY) : Small Oplog - * - * 1: Insert one document on the primary (Node 0) and ensure it is replicated. - * 2: Stop node 2. - * 3: Wait until Node 2 is down. - * 4: Overflow the primary's oplog. - * 5: Stop Node 1 and restart Node 2. - * 6: Wait for Node 2 to transition to RECOVERING (it should be too stale). - * 7: Stop and restart Node 2. - * 8: Wait for Node 2 to transition to RECOVERING (its oplog should remain stale after restart). - * 9: Restart Node 1, which should have the full oplog history. - * 10: Wait for Node 2 to leave RECOVERING and transition to SECONDARY. - * - */ - -(function() { - load('jstests/replsets/rslib.js'); - - "use strict"; - - function getFirstOplogEntry(conn) { - return conn.getDB('local').oplog.rs.find().sort({ts: 1}).limit(1)[0]; - } - - /** - * Overflows the oplog of a given node. - * - * To detect oplog overflow, we continuously insert large documents until we - * detect that the first entry of the oplog is no longer the same as when we started. This - * implies that the oplog attempted to grow beyond its maximum size i.e. it - * has overflowed/rolled over. - * - * Each document will be inserted with a writeConcern given by 'writeConcern'. - * - */ - function overflowOplog(conn, db, writeConcern) { - var firstOplogEntry = getFirstOplogEntry(primary); - var collName = "overflow"; - - // Keep inserting large documents until the oplog rolls over. - const largeStr = new Array(32 * 1024).join('aaaaaaaa'); - while (bsonWoCompare(getFirstOplogEntry(conn), firstOplogEntry) === 0) { - assert.writeOK( - db[collName].insert({data: largeStr}, {writeConcern: {w: writeConcern}})); - } - } - - var testName = "too_stale_secondary"; - - var smallOplogSizeMB = 1; - var bigOplogSizeMB = 1000; - - // Node 0 is given a small oplog so we can overflow it. Node 1's large oplog allows it to store - // all entries comfortably without overflowing, so that Node 2 can eventually use it as a sync - // source after it goes too stale. - var replTest = new ReplSetTest({ - name: testName, - nodes: [ - {oplogSize: smallOplogSizeMB}, - {oplogSize: bigOplogSizeMB}, - {oplogSize: smallOplogSizeMB} - ] - }); - - var nodes = replTest.startSet(); - replTest.initiate({ - _id: testName, - members: [ - {_id: 0, host: nodes[0].host}, - {_id: 1, host: nodes[1].host, priority: 0}, - {_id: 2, host: nodes[2].host, priority: 0} - ] - }); - - var dbName = testName; - var collName = "test"; - - jsTestLog("Wait for Node 0 to become the primary."); - replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY); - - var primary = replTest.getPrimary(); - var primaryTestDB = primary.getDB(dbName); - - jsTestLog("1: Insert one document on the primary (Node 0) and ensure it is replicated."); - assert.writeOK(primaryTestDB[collName].insert({a: 1}, {writeConcern: {w: 3}})); - - jsTestLog("2: Stop Node 2."); - replTest.stop(2); - - jsTestLog("3: Wait until Node 2 is down."); - replTest.waitForState(replTest.nodes[2], ReplSetTest.State.DOWN); - - var firstOplogEntryNode1 = getFirstOplogEntry(replTest.nodes[1]); - - jsTestLog("4: Overflow the primary's oplog."); - overflowOplog(primary, primaryTestDB, 2); - - // Make sure that Node 1's oplog didn't overflow. - assert.eq(firstOplogEntryNode1, - getFirstOplogEntry(replTest.nodes[1]), - "Node 1's oplog overflowed unexpectedly."); - - jsTestLog("5: Stop Node 1 and restart Node 2."); - replTest.stop(1); - replTest.restart(2); - - jsTestLog("6: Wait for Node 2 to transition to RECOVERING (it should be too stale)."); - replTest.waitForState(replTest.nodes[2], ReplSetTest.State.RECOVERING); - - jsTestLog("7: Stop and restart Node 2."); - replTest.stop(2); - replTest.restart(2); - - jsTestLog( - "8: Wait for Node 2 to transition to RECOVERING (its oplog should remain stale after restart)"); - replTest.waitForState(replTest.nodes[2], ReplSetTest.State.RECOVERING); - - jsTestLog("9: Restart Node 1, which should have the full oplog history."); - replTest.restart(1); - - jsTestLog("10: Wait for Node 2 to leave RECOVERING and transition to SECONDARY."); - replTest.waitForState(replTest.nodes[2], ReplSetTest.State.SECONDARY); - - replTest.stopSet(); -}());
\ No newline at end of file diff --git a/jstests/replsets/toostale.js b/jstests/replsets/toostale.js new file mode 100644 index 00000000000..4f1a1057757 --- /dev/null +++ b/jstests/replsets/toostale.js @@ -0,0 +1,126 @@ +// This tests that: +// * stale members get into state 3 (recovering) +// * they stay in state 3 after restarting +// * they can recover and go into state 2 if someone less up-to-date becomes primary +// +// This test requires persistence in order for a restarted node with a stale oplog to stay in the +// RECOVERING state. A restarted node with an ephemeral storage engine will not have an oplog upon +// restart, so will immediately resync. +// @tags: [requires_persistence] + +/** + * 1: initial insert + * 2: initial sync + * 3: blind s2 + * 4: overflow oplog + * 5: unblind s2 + * 6: check s2.state == 3 + * 7: restart s2 + * 8: check s2.state == 3 + */ + +var w = 0; +var wait = function(f) { + w++; + var n = 0; + while (!f()) { + if (n % 4 == 0) + print("toostale.js waiting " + w); + if (++n == 4) { + print("" + f); + } + assert(n < 200, 'tried 200 times, giving up'); + sleep(1000); + } +}; + +var reconnect = function(a) { + wait(function() { + try { + a.bar.stats(); + return true; + } catch (e) { + print(e); + return false; + } + }); +}; + +var name = "toostale"; +var replTest = new ReplSetTest({name: name, nodes: 3, oplogSize: 5}); +var host = getHostName(); + +var nodes = replTest.startSet(); +replTest.initiate({ + _id: name, + members: [ + {_id: 0, host: host + ":" + replTest.ports[0], priority: 2}, + {_id: 1, host: host + ":" + replTest.ports[1], arbiterOnly: true}, + {_id: 2, host: host + ":" + replTest.ports[2], priority: 0} + ] +}); +var master = replTest.getPrimary(); +var mdb = master.getDB("foo"); + +print("1: initial insert"); +mdb.foo.save({a: 1000}); + +print("2: initial sync"); +replTest.awaitReplication(); + +print("3: stop s2"); +replTest.stop(2); +print("waiting until the master knows the slave is blind"); +assert.soon(function() { + return master.getDB("admin").runCommand({replSetGetStatus: 1}).members[2].health == 0; +}); +print("okay"); + +print("4: overflow oplog"); +reconnect(master.getDB("local")); +var count = master.getDB("local").oplog.rs.find().itcount(); +var prevCount = -1; +while (count > prevCount) { + print("inserting 1000"); + var bulk = mdb.bar.initializeUnorderedBulkOp(); + for (var i = 0; i < 1000; i++) { + bulk.insert({x: i, date: new Date(), str: "safkaldmfaksndfkjansfdjanfjkafa"}); + } + assert.writeOK(bulk.execute()); + + prevCount = count; + replTest.awaitReplication(); + count = master.getDB("local").oplog.rs.find().itcount(); + print("count: " + count + " prev: " + prevCount); +} + +print("5: restart s2"); +replTest.restart(2); +print("waiting until the master knows the slave is not blind"); +assert.soon(function() { + return master.getDB("admin").runCommand({replSetGetStatus: 1}).members[2].health != 0; +}); +print("okay"); + +print("6: check s2.state == 3"); +var goStale = function() { + wait(function() { + var status = master.getDB("admin").runCommand({replSetGetStatus: 1}); + printjson(status); + return status.members[2].state == 3; + }); +}; +goStale(); + +print("7: restart s2"); +replTest.stop(2); +replTest.restart(2); + +print("8: check s2.state == 3"); +assert.soon(function() { + var status = master.getDB("admin").runCommand({replSetGetStatus: 1}); + printjson(status); + return status.members && status.members[2].state == 3; +}); + +replTest.stop(0); diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp index 41781d2fcc9..308e6c7ecce 100644 --- a/src/mongo/db/repl/bgsync.cpp +++ b/src/mongo/db/repl/bgsync.cpp @@ -315,20 +315,10 @@ void BackgroundSync::_produce(OperationContext* opCtx) { return; } - // We only need to mark ourselves as too stale once. - if (_tooStale) { - return; - } - - // Mark yourself as too stale. - _tooStale = true; - error() << "too stale to catch up -- entering maintenance mode"; log() << "Our newest OpTime : " << lastOpTimeFetched; log() << "Earliest OpTime available is " << syncSourceResp.earliestOpTimeSeen; log() << "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember"; - - // Activate maintenance mode and transition to RECOVERING. auto status = _replCoord->setMaintenanceMode(true); if (!status.isOK()) { warning() << "Failed to transition into maintenance mode: " << status; @@ -363,20 +353,6 @@ void BackgroundSync::_produce(OperationContext* opCtx) { return; } - // If we find a good sync source after having gone too stale, disable maintenance mode so we can - // transition to SECONDARY. - if (_tooStale) { - - _tooStale = false; - - log() << "No longer too stale. Able to sync from " << _syncSourceHost; - - auto status = _replCoord->setMaintenanceMode(false); - if (!status.isOK()) { - warning() << "Failed to leave maintenance mode: " << status; - } - } - long long lastHashFetched; { stdx::lock_guard<stdx::mutex> lock(_mutex); diff --git a/src/mongo/db/repl/bgsync.h b/src/mongo/db/repl/bgsync.h index 6d068967723..ead036c79d2 100644 --- a/src/mongo/db/repl/bgsync.h +++ b/src/mongo/db/repl/bgsync.h @@ -174,47 +174,31 @@ private: // A pointer to the replication coordinator external state. ReplicationCoordinatorExternalState* _replicationCoordinatorExternalState; - /** - * All member variables are labeled with one of the following codes indicating the - * synchronization rules for accessing them: - * - * (PR) Completely private to BackgroundSync. Can be read or written to from within the main - * BackgroundSync thread without synchronization. Shouldn't be accessed outside of this - * thread. - * - * (S) Self-synchronizing; access in any way from any context. - * - * (M) Reads and writes guarded by _mutex - * - */ + // _mutex protects all of the class variables declared below. + // + // Never hold bgsync mutex when trying to acquire the ReplicationCoordinator mutex. + mutable stdx::mutex _mutex; - // Protects member data of BackgroundSync. - // Never hold the BackgroundSync mutex when trying to acquire the ReplicationCoordinator mutex. - mutable stdx::mutex _mutex; // (S) + OpTime _lastOpTimeFetched; - OpTime _lastOpTimeFetched; // (M) - - // lastFetchedHash is used to match ops to determine if we need to rollback, when a secondary. - long long _lastFetchedHash = 0LL; // (M) + // lastFetchedHash is used to match ops to determine if we need to rollback, when + // a secondary. + long long _lastFetchedHash = 0LL; // Thread running producerThread(). - std::unique_ptr<stdx::thread> _producerThread; // (M) + std::unique_ptr<stdx::thread> _producerThread; // Set to true if shutdown() has been called. - bool _inShutdown = false; // (M) - - // Flag that marks whether a node's oplog has no common point with any - // potential sync sources. - bool _tooStale = false; // (PR) + bool _inShutdown = false; - ProducerState _state = ProducerState::Starting; // (M) + ProducerState _state = ProducerState::Starting; - HostAndPort _syncSourceHost; // (M) + HostAndPort _syncSourceHost; // Current sync source resolver validating sync source candidates. // Pointer may be read on any thread that locks _mutex or unlocked on the BGSync thread. It can // only be written to by the BGSync thread while holding _mutex. - std::unique_ptr<SyncSourceResolver> _syncSourceResolver; // (M) + std::unique_ptr<SyncSourceResolver> _syncSourceResolver; // Current oplog fetcher tailing the oplog on the sync source. std::unique_ptr<OplogFetcher> _oplogFetcher; |