summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDianna Hohensee <dianna.hohensee@mongodb.com>2019-11-05 19:58:36 +0000
committerevergreen <evergreen@mongodb.com>2019-11-05 19:58:36 +0000
commitec44fef95a3e7a6620df1ff67796b9d4566aeecb (patch)
tree7fdc6b1251eeaec380479a3c09081da835f9f182
parent9561ea73bc0004fc1835430f9789546484c1e7e7 (diff)
downloadmongo-ec44fef95a3e7a6620df1ff67796b9d4566aeecb.tar.gz
SERVER-41391 clear the oplogTruncateAfterPoint timestamp on stepdown after there are no more active writes
-rw-r--r--src/mongo/db/repl/replication_consistency_markers.h8
-rw-r--r--src/mongo/db/repl/replication_consistency_markers_impl.cpp34
-rw-r--r--src/mongo/db/repl/replication_consistency_markers_impl.h1
-rw-r--r--src/mongo/db/repl/replication_consistency_markers_mock.cpp3
-rw-r--r--src/mongo/db/repl/replication_consistency_markers_mock.h1
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state.h6
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp20
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.h1
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.cpp2
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.h1
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp6
11 files changed, 83 insertions, 0 deletions
diff --git a/src/mongo/db/repl/replication_consistency_markers.h b/src/mongo/db/repl/replication_consistency_markers.h
index 894aec89f66..c59abdac678 100644
--- a/src/mongo/db/repl/replication_consistency_markers.h
+++ b/src/mongo/db/repl/replication_consistency_markers.h
@@ -144,6 +144,14 @@ public:
// -------- Oplog Truncate After Point ----------
/**
+ * Ensures that the fast-count counter for the oplogTruncateAfterPoint collection is properly
+ * set. An unclean shutdown can result in a miscount, if the persisted size store is not updated
+ * before the crash. Rollback usually handles this for user collections, but local, unreplicated
+ * collections are not adjusted.
+ */
+ virtual void ensureFastCountOnOplogTruncateAfterPoint(OperationContext* opCtx) = 0;
+
+ /**
* The oplog truncate after point is set to the beginning of a batch of oplog entries before
* the oplog entries are written into the oplog, and reset before we begin applying the batch.
* On startup all oplog entries with a value >= the oplog truncate after point should be
diff --git a/src/mongo/db/repl/replication_consistency_markers_impl.cpp b/src/mongo/db/repl/replication_consistency_markers_impl.cpp
index 406ad96c9fd..d45aa9c492f 100644
--- a/src/mongo/db/repl/replication_consistency_markers_impl.cpp
+++ b/src/mongo/db/repl/replication_consistency_markers_impl.cpp
@@ -308,6 +308,40 @@ ReplicationConsistencyMarkersImpl::_getOplogTruncateAfterPointDocument(
return oplogTruncateAfterPoint;
}
+void ReplicationConsistencyMarkersImpl::ensureFastCountOnOplogTruncateAfterPoint(
+ OperationContext* opCtx) {
+ LOG(3) << "Updating cached fast-count on collection " << _oplogTruncateAfterPointNss
+ << " in case an unclean shutdown caused it to become incorrect.";
+
+ auto result = _storageInterface->findSingleton(opCtx, _oplogTruncateAfterPointNss);
+
+ if (result.getStatus() == ErrorCodes::NamespaceNotFound) {
+ return;
+ }
+
+ if (result.getStatus() == ErrorCodes::CollectionIsEmpty) {
+ // The count is updated before successful commit of a write, so unclean shutdown can leave
+ // the value incorrectly set to one.
+ invariant(
+ _storageInterface->setCollectionCount(opCtx, _oplogTruncateAfterPointNss, 0).isOK());
+ return;
+ }
+
+ if (result.getStatus() == ErrorCodes::TooManyMatchingDocuments) {
+ fassert(51265,
+ {result.getStatus().code(),
+ str::stream() << "More than one document was found in the '"
+ << kDefaultOplogTruncateAfterPointNamespace
+ << "' collection. Users should not write to this collection. Please "
+ "delete the excess documents"});
+ }
+ fassert(51266, result.getStatus());
+
+ // We can safely set a count of one. We know that we only ever write one document, and the
+ // success of findSingleton above confirms only one document exists in the collection.
+ invariant(_storageInterface->setCollectionCount(opCtx, _oplogTruncateAfterPointNss, 1).isOK());
+}
+
void ReplicationConsistencyMarkersImpl::_upsertOplogTruncateAfterPointDocument(
OperationContext* opCtx, const BSONObj& updateSpec) {
fassert(40512,
diff --git a/src/mongo/db/repl/replication_consistency_markers_impl.h b/src/mongo/db/repl/replication_consistency_markers_impl.h
index cb3b8ecf6cc..4cb924eeea0 100644
--- a/src/mongo/db/repl/replication_consistency_markers_impl.h
+++ b/src/mongo/db/repl/replication_consistency_markers_impl.h
@@ -69,6 +69,7 @@ public:
void setMinValid(OperationContext* opCtx, const OpTime& minValid) override;
void setMinValidToAtLeast(OperationContext* opCtx, const OpTime& minValid) override;
+ void ensureFastCountOnOplogTruncateAfterPoint(OperationContext* opCtx) override;
void setOplogTruncateAfterPoint(OperationContext* opCtx, const Timestamp& timestamp) override;
Timestamp getOplogTruncateAfterPoint(OperationContext* opCtx) const override;
diff --git a/src/mongo/db/repl/replication_consistency_markers_mock.cpp b/src/mongo/db/repl/replication_consistency_markers_mock.cpp
index 5c698190445..0001bdc2616 100644
--- a/src/mongo/db/repl/replication_consistency_markers_mock.cpp
+++ b/src/mongo/db/repl/replication_consistency_markers_mock.cpp
@@ -80,6 +80,9 @@ void ReplicationConsistencyMarkersMock::setMinValidToAtLeast(OperationContext* o
_minValid = std::max(_minValid, minValid);
}
+void ReplicationConsistencyMarkersMock::ensureFastCountOnOplogTruncateAfterPoint(
+ OperationContext* opCtx) {}
+
void ReplicationConsistencyMarkersMock::setOplogTruncateAfterPoint(OperationContext* opCtx,
const Timestamp& timestamp) {
stdx::lock_guard<Latch> lock(_minValidBoundariesMutex);
diff --git a/src/mongo/db/repl/replication_consistency_markers_mock.h b/src/mongo/db/repl/replication_consistency_markers_mock.h
index 3fe3c2670f5..eff8bf2961b 100644
--- a/src/mongo/db/repl/replication_consistency_markers_mock.h
+++ b/src/mongo/db/repl/replication_consistency_markers_mock.h
@@ -61,6 +61,7 @@ public:
void setMinValid(OperationContext* opCtx, const OpTime& minValid) override;
void setMinValidToAtLeast(OperationContext* opCtx, const OpTime& minValid) override;
+ void ensureFastCountOnOplogTruncateAfterPoint(OperationContext* opCtx) override;
void setOplogTruncateAfterPoint(OperationContext* opCtx, const Timestamp& timestamp) override;
Timestamp getOplogTruncateAfterPoint(OperationContext* opCtx) const override;
diff --git a/src/mongo/db/repl/replication_coordinator_external_state.h b/src/mongo/db/repl/replication_coordinator_external_state.h
index dd6f4e507ac..e16c35fb016 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state.h
@@ -218,6 +218,12 @@ public:
virtual void shardingOnStepDownHook() = 0;
/**
+ * Clears oplog visibility state. All of the oplog is safely visible because there are no oplog
+ * writes during stepdown.
+ */
+ virtual void clearOplogVisibilityStateForStepDown() = 0;
+
+ /**
* Notifies the bgsync and syncSourceFeedback threads to choose a new sync source.
*/
virtual void signalApplierToChooseNewSyncSource() = 0;
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 23611a79206..2d9712e5a11 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -698,6 +698,26 @@ void ReplicationCoordinatorExternalStateImpl::shardingOnStepDownHook() {
}
}
+void ReplicationCoordinatorExternalStateImpl::clearOplogVisibilityStateForStepDown() {
+ auto opCtx = cc().getOperationContext();
+ // Temporarily turn off flow control ticketing. Getting a ticket can stall on a ticket being
+ // available, which may have to wait for the ticket refresher to run, which in turn blocks on
+ // the repl _mutex to check whether we are primary or not: this is a deadlock because stepdown
+ // already holds the repl _mutex!
+ auto originalFlowControlSetting = opCtx->shouldParticipateInFlowControl();
+ ON_BLOCK_EXIT([&] { opCtx->setShouldParticipateInFlowControl(originalFlowControlSetting); });
+ opCtx->setShouldParticipateInFlowControl(false);
+
+ // We can clear the oplogTruncateAfterPoint because we know there are no concurrent user writes
+ // during stepdown and therefore presently no oplog holes.
+ //
+ // This value is updated periodically while in PRIMARY mode to protect against oplog holes on
+ // unclean shutdown. The value must then be cleared on stepdown because stepup expects the value
+ // to be unset. Batch application, in mode SECONDARY, also uses the value to protect against
+ // unclean shutdown, and will handle both setting AND unsetting the value.
+ _replicationProcess->getConsistencyMarkers()->setOplogTruncateAfterPoint(opCtx, Timestamp());
+}
+
void ReplicationCoordinatorExternalStateImpl::_shardingOnTransitionToPrimaryHook(
OperationContext* opCtx) {
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.h b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
index fc52e77f280..4f25122898f 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
@@ -93,6 +93,7 @@ public:
virtual HostAndPort getClientHostAndPort(const OperationContext* opCtx);
virtual void closeConnections();
virtual void shardingOnStepDownHook();
+ virtual void clearOplogVisibilityStateForStepDown() override;
virtual void signalApplierToChooseNewSyncSource();
virtual void stopProducer();
virtual void startProducerIfStopped();
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
index 75bdac91439..053f4d460a8 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
@@ -228,6 +228,8 @@ void ReplicationCoordinatorExternalStateMock::closeConnections() {
void ReplicationCoordinatorExternalStateMock::shardingOnStepDownHook() {}
+void ReplicationCoordinatorExternalStateMock::clearOplogVisibilityStateForStepDown() {}
+
void ReplicationCoordinatorExternalStateMock::signalApplierToChooseNewSyncSource() {}
void ReplicationCoordinatorExternalStateMock::stopProducer() {}
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
index 0f1ed300583..afcc16ce995 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
@@ -84,6 +84,7 @@ public:
virtual StatusWith<OpTimeAndWallTime> loadLastOpTimeAndWallTime(OperationContext* opCtx);
virtual void closeConnections();
virtual void shardingOnStepDownHook();
+ virtual void clearOplogVisibilityStateForStepDown() override;
virtual void signalApplierToChooseNewSyncSource();
virtual void stopProducer();
virtual void startProducerIfStopped();
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 3fe72a5d6f1..b391fa27cdf 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -417,6 +417,10 @@ bool ReplicationCoordinatorImpl::_startLoadLocalConfig(OperationContext* opCtx)
// initial sync has completed, it also sees these collections.
fassert(50708, _replicationProcess->getConsistencyMarkers()->createInternalCollections(opCtx));
+ // Ensure (update if needed) the in-memory count for the oplogTruncateAfterPoint collection
+ // matches the collection contents.
+ _replicationProcess->getConsistencyMarkers()->ensureFastCountOnOplogTruncateAfterPoint(opCtx);
+
_replicationProcess->getConsistencyMarkers()->initializeMinValidDocument(opCtx);
fassert(51240, _externalState->createLocalLastVoteCollection(opCtx));
@@ -2992,8 +2996,10 @@ void ReplicationCoordinatorImpl::_performPostMemberStateUpdateAction(
_externalState->closeConnections();
/* FALLTHROUGH */
case kActionSteppedDown:
+ // This code must be safe to run on node rollback and node removal!
_externalState->shardingOnStepDownHook();
_externalState->stopNoopWriter();
+ _externalState->clearOplogVisibilityStateForStepDown();
break;
case kActionStartSingleNodeElection:
// In protocol version 1, single node replset will run an election instead of