From 8f1516a1b43656ab113b0a0babdd42e5222fb731 Mon Sep 17 00:00:00 2001 From: Scott Hernandez Date: Fri, 5 Feb 2016 10:56:42 -0500 Subject: SERVER-22452: log message when fetching the oplog ends abruptly --- src/mongo/db/repl/bgsync.cpp | 36 +++++++++++++++++------------------- src/mongo/db/repl/bgsync.h | 14 -------------- 2 files changed, 17 insertions(+), 33 deletions(-) diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp index 439ae5a0ba8..50a9e69c0e2 100644 --- a/src/mongo/db/repl/bgsync.cpp +++ b/src/mongo/db/repl/bgsync.cpp @@ -450,21 +450,24 @@ void BackgroundSync::_fetcherCallback(const StatusWith& // if target cut connections between connecting and querying (for // example, because it stepped down) we might not have a cursor if (!result.isOK()) { + LOG(2) << "Error returned from oplog query: " << result.getStatus(); return; } if (inShutdown()) { + LOG(2) << "Interrupted by shutdown while querying oplog. 1"; // 1st instance. return; } // Check if we have been stopped. if (isStopped()) { + LOG(2) << "Interrupted by stop request while querying the oplog. 1"; // 1st instance. return; } const auto& queryResponse = result.getValue(); bool syncSourceHasSyncSource = false; - OpTime sourcesLastOp; + OpTime sourcesLastOpTime; // Forward metadata (containing liveness information) to replication coordinator. bool receivedMetadata = @@ -483,7 +486,7 @@ void BackgroundSync::_fetcherCallback(const StatusWith& _replCoord->cancelAndRescheduleElectionTimeout(); } syncSourceHasSyncSource = metadata.getSyncSourceIndex() != -1; - sourcesLastOp = metadata.getLastOpVisible(); + sourcesLastOpTime = metadata.getLastOpVisible(); } const auto& documents = queryResponse.documents; @@ -520,7 +523,8 @@ void BackgroundSync::_fetcherCallback(const StatusWith& // No work to do if we are draining/primary. if (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary()) { - LOG(1) << "waiting for draining or we are primary, not adding more ops to buffer"; + LOG(2) << "Interrupted by waiting for applier to drain " + << "or becoming primary while querying the oplog. 1"; // 1st instance. return; } @@ -531,6 +535,7 @@ void BackgroundSync::_fetcherCallback(const StatusWith& stdx::unique_lock lock(_mutex); // If we are stopped then return without queueing this batch to apply. if (_stopped) { + LOG(2) << "Interrupted by stop request while querying the oplog. 2"; // 2nd instance. return; } lastTS = _lastOpTimeFetched.getTimestamp(); @@ -620,22 +625,30 @@ void BackgroundSync::_fetcherCallback(const StatusWith& } if (inShutdown()) { + LOG(2) << "Interrupted by shutdown while querying oplog. 2"; // 2nd instance. return; } // If we are transitioning to primary state, we need to leave // this loop in order to go into bgsync-stop mode. if (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary()) { + LOG(2) << "Interrupted by waiting for applier to drain " + << "or becoming primary while querying the oplog. 2"; // 2nd instance. return; } // re-evaluate quality of sync target - if (_shouldChangeSyncSource(source, sourcesLastOp, syncSourceHasSyncSource)) { + if (getSyncTarget().empty() || + _replCoord->shouldChangeSyncSource(source, sourcesLastOpTime, syncSourceHasSyncSource)) { + LOG(1) << "Cancelling oplog query because we have to choose a sync source. Current source: " + << source << ", OpTime" << sourcesLastOpTime + << ", hasSyncSource:" << syncSourceHasSyncSource; return; } // Check if we have been stopped. if (isStopped()) { + LOG(2) << "Interrupted by a stop request while fetching the oplog so starting a new query."; return; } @@ -651,21 +664,6 @@ void BackgroundSync::_fetcherCallback(const StatusWith& } } -bool BackgroundSync::_shouldChangeSyncSource(const HostAndPort& syncSource, - const OpTime& syncSourceLastOpTime, - bool syncSourceHasSyncSource) { - // is it even still around? - if (getSyncTarget().empty() || syncSource.empty()) { - return true; - } - - // check other members: is any member's optime more than MaxSyncSourceLag seconds - // ahead of the current sync source? - return _replCoord->shouldChangeSyncSource( - syncSource, syncSourceLastOpTime, syncSourceHasSyncSource); -} - - bool BackgroundSync::peek(BSONObj* op) { return _buffer.peek(*op); } diff --git a/src/mongo/db/repl/bgsync.h b/src/mongo/db/repl/bgsync.h index 489790f988d..9deaf315193 100644 --- a/src/mongo/db/repl/bgsync.h +++ b/src/mongo/db/repl/bgsync.h @@ -197,20 +197,6 @@ private: const HostAndPort& source, stdx::function getConnection); - /** - * Evaluate if the current sync source is still good. - * "syncSource" is the name of the current sync source, which will be used to look up the - * member's heartbeat data. - * "syncSourceLastOpTime" is the last OpTime the sync source has. This is passed in because the - * data stored from heartbeats could be too stale and would cause unnecessary sync source - * changes. - * "syncSourceHasSyncSource" indicates whether our sync source is currently syncing from another - * member. - */ - bool _shouldChangeSyncSource(const HostAndPort& syncSource, - const OpTime& syncSourceLastOpTime, - bool syncSourceHasSyncSource); - // restart syncing void start(OperationContext* txn); -- cgit v1.2.1