SERVER-35200 Speed up steady state oplog fetching failure detection

This patch attempts to improve how quickly a secondary node in steady state replication detects that its current sync source has failed or become partitioned and tries to select a new sync souce. The speed of this process can significantly impact how long it takes for a new primary to begin committing majority writes after a previous primary has failed or become partitioned from the replica set. This patch improves on the old behavior by reducing the number of fetcher restarts to 1, and also reducing the oplog 'find' request timeout used on a restart. (cherry picked from commit 2c6614c3bd716fb9ccaad1f7c68e9eb490ed1df6)
author: William Schultz <william.schultz@mongodb.com> 2018-07-06 10:41:17 -0400
committer: William Schultz <william.schultz@mongodb.com> 2018-10-01 18:16:53 -0400
commit: 2524d2aa24d137b6fa4e48c5f199eefa6e1cadaa (patch)
tree: 4087f8cc72ffd93981debd5d45f0d28eb6f90c02
parent: a70a8ce8cdc5734353acdd577956e58872169c64 (diff)
download: mongo-2524d2aa24d137b6fa4e48c5f199eefa6e1cadaa.tar.gz
11 files changed, 250 insertions, 48 deletions
diff --git a/jstests/core/set_param1.js b/jstests/core/set_param1.js
index 51b13ae87cc..4faa312808b 100644
--- a/jstests/core/set_param1.js
+++ b/jstests/core/set_param1.js
@@ -115,29 +115,59 @@ assert.commandFailed(
 assert.commandWorked(
     db.adminCommand({"setParameter": 1, logComponentVerbosity: old.logComponentVerbosity}));
 
-//
-// oplogFetcherMaxFetcherRestarts
-//
-
 var isMongos = (db.isMaster().msg === 'isdbgrid');
 if (!isMongos) {
-    var origRestarts =
-        assert.commandWorked(db.adminCommand({getParameter: 1, oplogFetcherMaxFetcherRestarts: 1}))
-            .oplogFetcherMaxFetcherRestarts;
-    assert.gte(
-        origRestarts, 0, 'default value of oplogFetcherMaxFetcherRestarts cannot be negative');
+    //
+    // oplogFetcherSteadyStateMaxFetcherRestarts
+    //
+    var origRestarts = assert
+                           .commandWorked(db.adminCommand(
+                               {getParameter: 1, oplogFetcherSteadyStateMaxFetcherRestarts: 1}))
+                           .oplogFetcherSteadyStateMaxFetcherRestarts;
+    assert.gte(origRestarts,
+               0,
+               'default value of oplogFetcherSteadyStateMaxFetcherRestarts cannot be negative');
     assert.commandFailedWithCode(
-        db.adminCommand({setParameter: 1, oplogFetcherMaxFetcherRestarts: -1}),
+        db.adminCommand({setParameter: 1, oplogFetcherSteadyStateMaxFetcherRestarts: -1}),
         ErrorCodes.BadValue,
-        'server should reject negative values for oplogFetcherMaxFetcherRestarts');
-    assert.commandWorked(db.adminCommand({setParameter: 1, oplogFetcherMaxFetcherRestarts: 0}));
+        'server should reject negative values for oplogFetcherSteadyStateMaxFetcherRestarts');
     assert.commandWorked(
-        db.adminCommand({setParameter: 1, oplogFetcherMaxFetcherRestarts: origRestarts + 20}));
-    assert.eq(
-        origRestarts + 20,
-        assert.commandWorked(db.adminCommand({getParameter: 1, oplogFetcherMaxFetcherRestarts: 1}))
-            .oplogFetcherMaxFetcherRestarts);
+        db.adminCommand({setParameter: 1, oplogFetcherSteadyStateMaxFetcherRestarts: 0}));
+    assert.commandWorked(db.adminCommand(
+        {setParameter: 1, oplogFetcherSteadyStateMaxFetcherRestarts: origRestarts + 20}));
+    assert.eq(origRestarts + 20,
+              assert
+                  .commandWorked(db.adminCommand(
+                      {getParameter: 1, oplogFetcherSteadyStateMaxFetcherRestarts: 1}))
+                  .oplogFetcherSteadyStateMaxFetcherRestarts);
     // Restore original value.
+    assert.commandWorked(db.adminCommand(
+        {setParameter: 1, oplogFetcherSteadyStateMaxFetcherRestarts: origRestarts}));
+
+    //
+    // oplogFetcherInitialSyncStateMaxFetcherRestarts
+    //
+    origRestarts = assert
+                       .commandWorked(db.adminCommand(
+                           {getParameter: 1, oplogFetcherInitialSyncMaxFetcherRestarts: 1}))
+                       .oplogFetcherInitialSyncMaxFetcherRestarts;
+    assert.gte(origRestarts,
+               0,
+               'default value of oplogFetcherInitialSyncMaxFetcherRestarts cannot be negative');
+    assert.commandFailedWithCode(
+        db.adminCommand({setParameter: 1, oplogFetcherInitialSyncMaxFetcherRestarts: -1}),
+        ErrorCodes.BadValue,
+        'server should reject negative values for oplogFetcherInitialSyncMaxFetcherRestarts');
     assert.commandWorked(
-        db.adminCommand({setParameter: 1, oplogFetcherMaxFetcherRestarts: origRestarts}));
+        db.adminCommand({setParameter: 1, oplogFetcherInitialSyncMaxFetcherRestarts: 0}));
+    assert.commandWorked(db.adminCommand(
+        {setParameter: 1, oplogFetcherInitialSyncMaxFetcherRestarts: origRestarts + 20}));
+    assert.eq(origRestarts + 20,
+              assert
+                  .commandWorked(db.adminCommand(
+                      {getParameter: 1, oplogFetcherInitialSyncMaxFetcherRestarts: 1}))
+                  .oplogFetcherInitialSyncMaxFetcherRestarts);
+    // Restore original value.
+    assert.commandWorked(db.adminCommand(
+        {setParameter: 1, oplogFetcherInitialSyncMaxFetcherRestarts: origRestarts}));
 }
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp
index fbbf1c56efd..ac49485ede0 100644
--- a/src/mongo/db/repl/bgsync.cpp
+++ b/src/mongo/db/repl/bgsync.cpp
@@ -427,7 +427,7 @@ void BackgroundSync::_produce() {
             source,
             NamespaceString(rsOplogName),
             _replCoord->getConfig(),
-            _replicationCoordinatorExternalState->getOplogFetcherMaxFetcherRestarts(),
+            _replicationCoordinatorExternalState->getOplogFetcherSteadyStateMaxFetcherRestarts(),
             syncSourceResp.rbid,
             true /* requireFresherSyncSource */,
             &dataReplicatorExternalState,
diff --git a/src/mongo/db/repl/oplog_fetcher.cpp b/src/mongo/db/repl/oplog_fetcher.cpp
index c9b96ab77f7..36a6e237e73 100644
--- a/src/mongo/db/repl/oplog_fetcher.cpp
+++ b/src/mongo/db/repl/oplog_fetcher.cpp
@@ -58,8 +58,15 @@ MONGO_FP_DECLARE(stopReplProducer);
 namespace {
 
 // Number of seconds for the `maxTimeMS` on the initial `find` command.
+//
+// For the initial 'find' request, we provide a generous timeout, to account for the potentially
+// slow process of a sync source finding the lastApplied optime provided in a node's query in its
+// oplog.
 MONGO_EXPORT_SERVER_PARAMETER(oplogInitialFindMaxSeconds, int, 60);
 
+// Number of seconds for the `maxTimeMS` on any retried `find` commands.
+MONGO_EXPORT_SERVER_PARAMETER(oplogRetriedFindMaxSeconds, int, 2);
+
 // Number of milliseconds to add to the `find` and `getMore` timeouts to calculate the network
 // timeout for the requests.
 const Milliseconds kNetworkTimeoutBufferMS{5000};
@@ -373,7 +380,7 @@ OplogFetcher::OplogFetcher(executor::TaskExecutor* executor,
     uassert(ErrorCodes::BadValue, "null onShutdownCallback function", onShutdownCallbackFn);
 
     auto currentTerm = dataReplicatorExternalState->getCurrentTermAndLastCommittedOpTime().value;
-    _fetcher = _makeFetcher(currentTerm, _lastFetched.opTime);
+    _fetcher = _makeFetcher(currentTerm, _lastFetched.opTime, _getInitialFindMaxTime());
 }
 
 OplogFetcher::~OplogFetcher() {
@@ -463,10 +470,14 @@ Milliseconds OplogFetcher::getAwaitDataTimeout_forTest() const {
     return _getGetMoreMaxTime();
 }
 
-Milliseconds OplogFetcher::_getFindMaxTime() const {
+Milliseconds OplogFetcher::_getInitialFindMaxTime() const {
     return Milliseconds(oplogInitialFindMaxSeconds.load() * 1000);
 }
 
+Milliseconds OplogFetcher::_getRetriedFindMaxTime() const {
+    return Milliseconds(oplogRetriedFindMaxSeconds.load() * 1000);
+}
+
 Milliseconds OplogFetcher::_getGetMoreMaxTime() const {
     return _awaitDataTimeout;
 }
@@ -511,7 +522,7 @@ void OplogFetcher::_callback(const Fetcher::QueryResponseStatus& result,
                 // Move the old fetcher into the shutting down instance.
                 _shuttingDownFetcher.swap(_fetcher);
                 // Create and start fetcher with current term and new starting optime.
-                _fetcher = _makeFetcher(currentTerm, _lastFetched.opTime);
+                _fetcher = _makeFetcher(currentTerm, _lastFetched.opTime, _getRetriedFindMaxTime());
                 auto scheduleStatus = _scheduleFetcher_inlock();
                 if (scheduleStatus.isOK()) {
                     log() << "Scheduled new oplog query " << _fetcher->toString();
@@ -704,15 +715,16 @@ void OplogFetcher::_finishCallback(Status status, OpTimeWithHash opTimeWithHash)
 }
 
 std::unique_ptr<Fetcher> OplogFetcher::_makeFetcher(long long currentTerm,
-                                                    OpTime lastFetchedOpTime) {
+                                                    OpTime lastFetchedOpTime,
+                                                    Milliseconds findMaxTime) {
     return stdx::make_unique<Fetcher>(
         _executor,
         _source,
         _nss.db().toString(),
-        makeFindCommandObject(_nss, currentTerm, lastFetchedOpTime, _getFindMaxTime()),
+        makeFindCommandObject(_nss, currentTerm, lastFetchedOpTime, findMaxTime),
         stdx::bind(&OplogFetcher::_callback, this, stdx::placeholders::_1, stdx::placeholders::_3),
         _metadataObject,
-        _getFindMaxTime() + kNetworkTimeoutBufferMS,
+        findMaxTime + kNetworkTimeoutBufferMS,
         _getGetMoreMaxTime() + kNetworkTimeoutBufferMS);
 }
 
diff --git a/src/mongo/db/repl/oplog_fetcher.h b/src/mongo/db/repl/oplog_fetcher.h
index 54bfbabbf8d..c567598a66f 100644
--- a/src/mongo/db/repl/oplog_fetcher.h
+++ b/src/mongo/db/repl/oplog_fetcher.h
@@ -237,7 +237,16 @@ private:
     /**
      * Returns how long the `find` command should wait before timing out.
      */
-    virtual Milliseconds _getFindMaxTime() const;
+    virtual Milliseconds _getInitialFindMaxTime() const;
+
+    /**
+     * Returns how long the `find` command should wait before timing out, if we are retrying the
+     * 'find' due to an error. This timeout should be considerably smaller than our initial oplog
+     * find time, since a communication failure with an upstream node may indicate it is
+     * unreachable.
+     */
+    virtual Milliseconds _getRetriedFindMaxTime() const;
+
 
     /**
      * Returns how long the `getMore` command should wait before timing out.
@@ -247,7 +256,9 @@ private:
     /**
      * Creates a new instance of the fetcher to tail the remote oplog starting at the given optime.
      */
-    std::unique_ptr<Fetcher> _makeFetcher(long long currentTerm, OpTime lastFetchedOpTime);
+    std::unique_ptr<Fetcher> _makeFetcher(long long currentTerm,
+                                          OpTime lastFetchedOpTime,
+                                          Milliseconds findMaxTime);
 
     /**
      * Returns whether the oplog fetcher is in shutdown.
diff --git a/src/mongo/db/repl/oplog_fetcher_test.cpp b/src/mongo/db/repl/oplog_fetcher_test.cpp
index 9170c203695..2b25d0bbe02 100644
--- a/src/mongo/db/repl/oplog_fetcher_test.cpp
+++ b/src/mongo/db/repl/oplog_fetcher_test.cpp
@@ -204,6 +204,11 @@ BSONObj OplogFetcherTest::makeOplogQueryMetadataObject(OpTime lastAppliedOpTime,
 HostAndPort source("localhost:12345");
 NamespaceString nss("local.oplog.rs");
 
+// For testing, set these network timeouts to match the defaults in the OplogFetcher.
+const Milliseconds kNetworkTimeoutBufferMS{5000};
+const Milliseconds initialFindMaxTime = Milliseconds(60000);
+const Milliseconds retriedFindMaxTime = Milliseconds(2000);
+
 ReplSetConfig _createConfig(bool isV1ElectionProtocol) {
     BSONObjBuilder bob;
     bob.append("_id", "myset");
@@ -1478,6 +1483,99 @@ TEST_F(OplogFetcherTest, OplogFetcherAbortsWithOriginalResponseErrorOnFailureToS
     ASSERT_EQUALS(_getOpTimeWithHash(ops[2]), shutdownState->getLastFetched());
 }
 
+TEST_F(OplogFetcherTest, OplogFetcherTimesOutCorrectlyOnInitialFindRequests) {
+    auto ops = _generateOplogEntries(2U);
+    std::size_t maxFetcherRestarts = 0U;
+    auto shutdownState = stdx::make_unique<ShutdownState>();
+    OplogFetcher oplogFetcher(&getExecutor(),
+                              _getOpTimeWithHash(ops[0]),
+                              source,
+                              nss,
+                              _createConfig(true),
+                              maxFetcherRestarts,
+                              rbid,
+                              true,
+                              dataReplicatorExternalState.get(),
+                              enqueueDocumentsFn,
+                              stdx::ref(*shutdownState));
+
+    ON_BLOCK_EXIT([this] { getExecutor().shutdown(); });
+
+    ASSERT_OK(oplogFetcher.startup());
+    ASSERT_TRUE(oplogFetcher.isActive());
+
+    auto net = getNet();
+
+    // Schedule a response at a time that would exceed the initial find request network timeout.
+    net->enterNetwork();
+    auto when = net->now() + initialFindMaxTime + kNetworkTimeoutBufferMS + Milliseconds(10);
+    auto noi = getNet()->getNextReadyRequest();
+    RemoteCommandResponse response = {
+        {makeCursorResponse(1, {ops[0], ops[1]})}, rpc::makeEmptyMetadata(), Milliseconds(0)};
+    auto request = net->scheduleSuccessfulResponse(noi, when, response);
+    net->runUntil(when);
+    net->runReadyNetworkOperations();
+    net->exitNetwork();
+
+    oplogFetcher.join();
+
+    // The fetcher should have shut down after its last request timed out.
+    ASSERT_EQUALS(ErrorCodes::NetworkTimeout, shutdownState->getStatus());
+}
+
+TEST_F(OplogFetcherTest, OplogFetcherTimesOutCorrectlyOnRetriedFindRequests) {
+    auto ops = _generateOplogEntries(2U);
+    std::size_t maxFetcherRestarts = 1U;
+    auto shutdownState = stdx::make_unique<ShutdownState>();
+    OplogFetcher oplogFetcher(&getExecutor(),
+                              _getOpTimeWithHash(ops[0]),
+                              source,
+                              nss,
+                              _createConfig(true),
+                              maxFetcherRestarts,
+                              rbid,
+                              true,
+                              dataReplicatorExternalState.get(),
+                              enqueueDocumentsFn,
+                              stdx::ref(*shutdownState));
+
+
+    ON_BLOCK_EXIT([this] { getExecutor().shutdown(); });
+
+    ASSERT_OK(oplogFetcher.startup());
+    ASSERT_TRUE(oplogFetcher.isActive());
+
+    auto net = getNet();
+
+    // Schedule a response at a time that would exceed the initial find request network timeout.
+    net->enterNetwork();
+    auto when = net->now() + initialFindMaxTime + kNetworkTimeoutBufferMS + Milliseconds(10);
+    auto noi = getNet()->getNextReadyRequest();
+    RemoteCommandResponse response = {
+        {makeCursorResponse(1, {ops[0], ops[1]})}, rpc::makeEmptyMetadata(), Milliseconds(0)};
+    auto request = net->scheduleSuccessfulResponse(noi, when, response);
+    net->runUntil(when);
+    net->runReadyNetworkOperations();
+    net->exitNetwork();
+
+    // Schedule a response at a time that would exceed the retried find request network timeout.
+    net->enterNetwork();
+    when = net->now() + retriedFindMaxTime + kNetworkTimeoutBufferMS + Milliseconds(10);
+    noi = getNet()->getNextReadyRequest();
+    response = {
+        {makeCursorResponse(1, {ops[0], ops[1]})}, rpc::makeEmptyMetadata(), Milliseconds(0)};
+    request = net->scheduleSuccessfulResponse(noi, when, response);
+    net->runUntil(when);
+    net->runReadyNetworkOperations();
+    net->exitNetwork();
+
+    oplogFetcher.join();
+
+    // The fetcher should have shut down after its last request timed out.
+    ASSERT_EQUALS(ErrorCodes::NetworkTimeout, shutdownState->getStatus());
+}
+
+
 bool sharedCallbackStateDestroyed = false;
 class SharedCallbackState {
     MONGO_DISALLOW_COPYING(SharedCallbackState);
diff --git a/src/mongo/db/repl/replication_coordinator_external_state.h b/src/mongo/db/repl/replication_coordinator_external_state.h
index 8776bfa8330..dadaaff2d2d 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state.h
@@ -348,9 +348,15 @@ public:
 
     /**
      * Returns maximum number of times that the oplog fetcher will consecutively restart the oplog
-     * tailing query on non-cancellation errors.
+     * tailing query on non-cancellation errors during steady state replication.
      */
-    virtual std::size_t getOplogFetcherMaxFetcherRestarts() const = 0;
+    virtual std::size_t getOplogFetcherSteadyStateMaxFetcherRestarts() const = 0;
+
+    /**
+     * Returns maximum number of times that the oplog fetcher will consecutively restart the oplog
+     * tailing query on non-cancellation errors during initial sync.
+     */
+    virtual std::size_t getOplogFetcherInitialSyncMaxFetcherRestarts() const = 0;
 
     /*
      * Creates noop writer instance. Setting the _noopWriter member is not protected by a guard,
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index c8737ba0090..597c9b47799 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -131,29 +131,59 @@ MONGO_EXPORT_STARTUP_SERVER_PARAMETER(initialSyncOplogBuffer,
 // Set this to specify size of read ahead buffer in the OplogBufferCollection.
 MONGO_EXPORT_STARTUP_SERVER_PARAMETER(initialSyncOplogBufferPeekCacheSize, int, 10000);
 
-// Set this to specify maximum number of times the oplog fetcher will consecutively restart the
-// oplog tailing query on non-cancellation errors.
+// Set this to specify the maximum number of times the oplog fetcher will consecutively restart the
+// oplog tailing query on non-cancellation errors during steady state replication.
 server_parameter_storage_type<int, ServerParameterType::kStartupAndRuntime>::value_type
-    oplogFetcherMaxFetcherRestarts(3);
-class ExportedOplogFetcherMaxFetcherRestartsServerParameter
+    oplogFetcherSteadyStateMaxFetcherRestarts(1);
+class ExportedOplogFetcherSteadyStateMaxFetcherRestartsServerParameter
     : public ExportedServerParameter<int, ServerParameterType::kStartupAndRuntime> {
 public:
-    ExportedOplogFetcherMaxFetcherRestartsServerParameter();
+    ExportedOplogFetcherSteadyStateMaxFetcherRestartsServerParameter();
     Status validate(const int& potentialNewValue) override;
-} _exportedOplogFetcherMaxFetcherRestartsServerParameter;
+} _exportedOplogFetcherSteadyStateMaxFetcherRestartsServerParameter;
 
-ExportedOplogFetcherMaxFetcherRestartsServerParameter::
-    ExportedOplogFetcherMaxFetcherRestartsServerParameter()
+ExportedOplogFetcherSteadyStateMaxFetcherRestartsServerParameter::
+    ExportedOplogFetcherSteadyStateMaxFetcherRestartsServerParameter()
     : ExportedServerParameter<int, ServerParameterType::kStartupAndRuntime>(
           ServerParameterSet::getGlobal(),
-          "oplogFetcherMaxFetcherRestarts",
-          &oplogFetcherMaxFetcherRestarts) {}
+          "oplogFetcherSteadyStateMaxFetcherRestarts",
+          &oplogFetcherSteadyStateMaxFetcherRestarts) {}
 
-Status ExportedOplogFetcherMaxFetcherRestartsServerParameter::validate(
+Status ExportedOplogFetcherSteadyStateMaxFetcherRestartsServerParameter::validate(
     const int& potentialNewValue) {
     if (potentialNewValue < 0) {
-        return Status(ErrorCodes::BadValue,
-                      "oplogFetcherMaxFetcherRestarts must be greater than or equal to 0");
+        return Status(
+            ErrorCodes::BadValue,
+            "oplogFetcherSteadyStateMaxFetcherRestarts must be greater than or equal to 0");
+    }
+    return Status::OK();
+}
+
+// Set this to specify the maximum number of times the oplog fetcher will consecutively restart the
+// oplog tailing query on non-cancellation errors during initial sync. By default we provide a
+// generous amount of restarts to avoid potentially restarting an entire initial sync from scratch.
+server_parameter_storage_type<int, ServerParameterType::kStartupAndRuntime>::value_type
+    oplogFetcherInitialSyncMaxFetcherRestarts(10);
+class ExportedOplogFetcherInitialSyncMaxFetcherRestartsServerParameter
+    : public ExportedServerParameter<int, ServerParameterType::kStartupAndRuntime> {
+public:
+    ExportedOplogFetcherInitialSyncMaxFetcherRestartsServerParameter();
+    Status validate(const int& potentialNewValue) override;
+} _exportedOplogFetcherInitialSyncMaxFetcherRestartsServerParameter;
+
+ExportedOplogFetcherInitialSyncMaxFetcherRestartsServerParameter::
+    ExportedOplogFetcherInitialSyncMaxFetcherRestartsServerParameter()
+    : ExportedServerParameter<int, ServerParameterType::kStartupAndRuntime>(
+          ServerParameterSet::getGlobal(),
+          "oplogFetcherInitialSyncMaxFetcherRestarts",
+          &oplogFetcherInitialSyncMaxFetcherRestarts) {}
+
+Status ExportedOplogFetcherInitialSyncMaxFetcherRestartsServerParameter::validate(
+    const int& potentialNewValue) {
+    if (potentialNewValue < 0) {
+        return Status(
+            ErrorCodes::BadValue,
+            "oplogFetcherInitialSyncMaxFetcherRestarts must be greater than or equal to 0");
     }
     return Status::OK();
 }
@@ -957,8 +987,14 @@ bool ReplicationCoordinatorExternalStateImpl::shouldUseDataReplicatorInitialSync
     return !use3dot2InitialSync;
 }
 
-std::size_t ReplicationCoordinatorExternalStateImpl::getOplogFetcherMaxFetcherRestarts() const {
-    return oplogFetcherMaxFetcherRestarts;
+std::size_t ReplicationCoordinatorExternalStateImpl::getOplogFetcherSteadyStateMaxFetcherRestarts()
+    const {
+    return oplogFetcherSteadyStateMaxFetcherRestarts.load();
+}
+
+std::size_t ReplicationCoordinatorExternalStateImpl::getOplogFetcherInitialSyncMaxFetcherRestarts()
+    const {
+    return oplogFetcherInitialSyncMaxFetcherRestarts.load();
 }
 
 JournalListener::Token ReplicationCoordinatorExternalStateImpl::getToken() {
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.h b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
index 8926f378829..ff2fa102982 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
@@ -116,7 +116,8 @@ public:
     virtual std::unique_ptr<OplogBuffer> makeSteadyStateOplogBuffer(
         OperationContext* txn) const override;
     virtual bool shouldUseDataReplicatorInitialSync() const override;
-    virtual std::size_t getOplogFetcherMaxFetcherRestarts() const override;
+    virtual std::size_t getOplogFetcherSteadyStateMaxFetcherRestarts() const override;
+    virtual std::size_t getOplogFetcherInitialSyncMaxFetcherRestarts() const override;
 
     // Methods from JournalListener.
     virtual JournalListener::Token getToken();
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
index 6b832728f24..cef211451a6 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
@@ -288,7 +288,13 @@ bool ReplicationCoordinatorExternalStateMock::shouldUseDataReplicatorInitialSync
     return true;
 }
 
-std::size_t ReplicationCoordinatorExternalStateMock::getOplogFetcherMaxFetcherRestarts() const {
+std::size_t ReplicationCoordinatorExternalStateMock::getOplogFetcherSteadyStateMaxFetcherRestarts()
+    const {
+    return 0;
+}
+
+std::size_t ReplicationCoordinatorExternalStateMock::getOplogFetcherInitialSyncMaxFetcherRestarts()
+    const {
     return 0;
 }
 
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
index c22e053bf35..7120433bd73 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
@@ -109,7 +109,8 @@ public:
     virtual std::unique_ptr<OplogBuffer> makeSteadyStateOplogBuffer(
         OperationContext* txn) const override;
     virtual bool shouldUseDataReplicatorInitialSync() const override;
-    virtual std::size_t getOplogFetcherMaxFetcherRestarts() const override;
+    virtual std::size_t getOplogFetcherSteadyStateMaxFetcherRestarts() const override;
+    virtual std::size_t getOplogFetcherInitialSyncMaxFetcherRestarts() const override;
 
     /**
      * Adds "host" to the list of hosts that this mock will match when responding to "isSelf"
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 10b60b7a205..eec1b8989ae 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -289,7 +289,8 @@ InitialSyncerOptions createInitialSyncerOptions(
     options.getSlaveDelay = [replCoord]() { return replCoord->getSlaveDelaySecs(); };
     options.syncSourceSelector = replCoord;
     options.replBatchLimitBytes = dur::UncommittedBytesLimit;
-    options.oplogFetcherMaxFetcherRestarts = externalState->getOplogFetcherMaxFetcherRestarts();
+    options.oplogFetcherMaxFetcherRestarts =
+        externalState->getOplogFetcherInitialSyncMaxFetcherRestarts();
     return options;
 }
 }  // namespace
author	William Schultz <william.schultz@mongodb.com>	2018-07-06 10:41:17 -0400
committer	William Schultz <william.schultz@mongodb.com>	2018-10-01 18:16:53 -0400
commit	2524d2aa24d137b6fa4e48c5f199eefa6e1cadaa (patch)
tree	4087f8cc72ffd93981debd5d45f0d28eb6f90c02
parent	a70a8ce8cdc5734353acdd577956e58872169c64 (diff)
download	mongo-2524d2aa24d137b6fa4e48c5f199eefa6e1cadaa.tar.gz