summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDianna Hohensee <dianna.hohensee@mongodb.com>2020-03-19 13:00:46 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-03-20 21:54:42 +0000
commit9309d0b1dfb78b700e765c91d0122c83a37edc41 (patch)
treebebc8008c19808c052adce0cba9944d6733a5b18 /src
parent02e12f2b4acfb8f1d401ed78a04fe4b6e23b9976 (diff)
downloadmongo-9309d0b1dfb78b700e765c91d0122c83a37edc41.tar.gz
SERVER-46984 Stop async updates to the oplogTruncateAfterPoint during primary shutdown that can race with clearing the oplogTruncateAfterPoint for primary clean shutdown.
(cherry picked from commit 5f3e1db10472fcd57615424c10372444a2c8427f)
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state.h14
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp14
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.h2
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.cpp2
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.h2
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp2
6 files changed, 22 insertions, 14 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_external_state.h b/src/mongo/db/repl/replication_coordinator_external_state.h
index a2fdb9a1d71..5bce5d1b06d 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state.h
@@ -227,10 +227,18 @@ public:
virtual void shardingOnStepDownHook() = 0;
/**
- * Clears oplog visibility state. All of the oplog is safely visible because there are no oplog
- * writes during stepdown.
+ * Stops asynchronous updates to and then clears the oplogTruncateAfterPoint.
+ *
+ * Safe to call when there are no oplog writes, and therefore no oplog holes that must be
+ * tracked by the oplogTruncateAfterPoint.
+ *
+ * Only primaries update the truncate point asynchronously; other replication states update the
+ * truncate point manually as necessary. This function should be called whenever replication
+ * leaves state PRIMARY: stepdown; and shutdown while in state PRIMARY. Otherwise, we might
+ * leave a stale oplogTruncateAfterPoint set and cause unnecessary oplog truncation during
+ * startup if the server gets restarted.
*/
- virtual void clearOplogVisibilityStateForStepDown() = 0;
+ virtual void stopAsyncUpdatesOfAndClearOplogTruncateAfterPoint() = 0;
/**
* Notifies the bgsync and syncSourceFeedback threads to choose a new sync source.
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 514f95dc4fa..54024dde95c 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -408,15 +408,15 @@ void ReplicationCoordinatorExternalStateImpl::shutdown(OperationContext* opCtx)
// _taskExecutor pointer never changes.
_taskExecutor->join();
- // Clear the truncate point if we are still primary, so nothing gets truncated unnecessarily on
- // startup. There are no oplog holes on clean primary shutdown. Stepdown is similarly safe and
- // clears the truncate point. The other replication states do need truncation if the truncate
- // point is set: e.g. interruption mid batch application can leave oplog holes.
+ // The oplog truncate after point must be cleared, if we are still primary for shutdown, so
+ // nothing gets truncated unnecessarily on startup. There are no oplog holes on clean primary
+ // shutdown. Stepdown is similarly safe from holes and halts updates to and clears the truncate
+ // point. The other replication states do need truncation if the truncate point is set: e.g.
+ // interruption mid batch application can leave oplog holes.
if (!storageGlobalParams.readOnly &&
_replicationProcess->getConsistencyMarkers()
->isOplogTruncateAfterPointBeingUsedForPrimary()) {
- _replicationProcess->getConsistencyMarkers()->setOplogTruncateAfterPoint(opCtx,
- Timestamp());
+ stopAsyncUpdatesOfAndClearOplogTruncateAfterPoint();
}
}
@@ -768,7 +768,7 @@ void ReplicationCoordinatorExternalStateImpl::shardingOnStepDownHook() {
}
}
-void ReplicationCoordinatorExternalStateImpl::clearOplogVisibilityStateForStepDown() {
+void ReplicationCoordinatorExternalStateImpl::stopAsyncUpdatesOfAndClearOplogTruncateAfterPoint() {
auto opCtx = cc().getOperationContext();
// Temporarily turn off flow control ticketing. Getting a ticket can stall on a ticket being
// available, which may have to wait for the ticket refresher to run, which in turn blocks on
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.h b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
index 1d5d904c0f4..5a3b52229bf 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
@@ -96,7 +96,7 @@ public:
virtual HostAndPort getClientHostAndPort(const OperationContext* opCtx);
virtual void closeConnections();
virtual void shardingOnStepDownHook();
- virtual void clearOplogVisibilityStateForStepDown() override;
+ virtual void stopAsyncUpdatesOfAndClearOplogTruncateAfterPoint() override;
virtual void signalApplierToChooseNewSyncSource();
virtual void stopProducer();
virtual void startProducerIfStopped();
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
index e2b0ee9ecfd..5d6305a2c79 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
@@ -230,7 +230,7 @@ void ReplicationCoordinatorExternalStateMock::closeConnections() {
void ReplicationCoordinatorExternalStateMock::shardingOnStepDownHook() {}
-void ReplicationCoordinatorExternalStateMock::clearOplogVisibilityStateForStepDown() {}
+void ReplicationCoordinatorExternalStateMock::stopAsyncUpdatesOfAndClearOplogTruncateAfterPoint() {}
void ReplicationCoordinatorExternalStateMock::signalApplierToChooseNewSyncSource() {}
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
index be37389312a..1444eaeb1ef 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
@@ -85,7 +85,7 @@ public:
virtual StatusWith<OpTimeAndWallTime> loadLastOpTimeAndWallTime(OperationContext* opCtx);
virtual void closeConnections();
virtual void shardingOnStepDownHook();
- virtual void clearOplogVisibilityStateForStepDown() override;
+ virtual void stopAsyncUpdatesOfAndClearOplogTruncateAfterPoint() override;
virtual void signalApplierToChooseNewSyncSource();
virtual void stopProducer();
virtual void startProducerIfStopped();
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index bf516432716..023a20ad13d 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -3735,7 +3735,7 @@ void ReplicationCoordinatorImpl::_performPostMemberStateUpdateAction(
// This code must be safe to run on node rollback and node removal!
_externalState->shardingOnStepDownHook();
_externalState->stopNoopWriter();
- _externalState->clearOplogVisibilityStateForStepDown();
+ _externalState->stopAsyncUpdatesOfAndClearOplogTruncateAfterPoint();
break;
case kActionStartSingleNodeElection:
// In protocol version 1, single node replset will run an election instead of