summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSuganthi Mani <suganthi.mani@mongodb.com>2020-01-16 21:57:40 +0000
committerA. Jesse Jiryu Davis <jesse@mongodb.com>2020-01-27 15:40:36 -0500
commit7f5abac0250d152fc9cfa44effe45a842778b450 (patch)
tree995c4965a1d889b1b0d6e9dedbca4c269e82a3d7
parent5903e36fcf50d0bb7d362b1a9d5aa417d495532a (diff)
downloadmongo-7f5abac0250d152fc9cfa44effe45a842778b450.tar.gz
SERVER-41333 Make safer for initial Sync and Shutdown to write the minValid document at the lastApplied.
-rw-r--r--src/mongo/db/db.cpp30
-rw-r--r--src/mongo/db/repl/replication_consistency_markers_impl.cpp15
-rw-r--r--src/mongo/db/repl/replication_coordinator.h9
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state.h9
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp53
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.h3
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.cpp3
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.h1
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h2
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.h2
-rw-r--r--src/mongo/db/repl/replication_coordinator_noop.cpp2
-rw-r--r--src/mongo/db/repl/replication_coordinator_noop.h2
-rw-r--r--src/mongo/dbtests/storage_timestamp_tests.cpp2
-rw-r--r--src/mongo/embedded/replication_coordinator_embedded.cpp2
-rw-r--r--src/mongo/embedded/replication_coordinator_embedded.h2
17 files changed, 113 insertions, 32 deletions
diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp
index 8b1c28c11b2..65fb74748a5 100644
--- a/src/mongo/db/db.cpp
+++ b/src/mongo/db/db.cpp
@@ -992,19 +992,37 @@ void shutdownTask(const ShutdownTaskArgs& shutdownArgs) {
repl::ReplicationStateTransitionLockGuard rstl(
opCtx, MODE_X, repl::ReplicationStateTransitionLockGuard::EnqueueOnly());
- // Kill all operations. After this point, the opCtx will have been marked as killed and will
- // not be usable other than to kill all transactions directly below.
+ // Kill all operations. And, makes all newly created opCtx to be immediately interrupted.
+ // After this point, the opCtx will have been marked as killed and will not be usable other
+ // than to kill all transactions directly below.
serviceContext->setKillAllOperations();
// Destroy all stashed transaction resources, in order to release locks.
killSessionsLocalShutdownAllTransactions(opCtx);
rstl.waitForLockUntil(Date_t::max());
- }
- // Shuts down the thread pool and waits for index builds to finish.
- // Depends on setKillAllOperations() above to interrupt the index build operations.
- IndexBuildsCoordinator::get(serviceContext)->shutdown();
+ // Release the rstl before waiting for the index build threads to join as index build
+ // reacquires rstl in uninterruptible lock guard to finish their cleanup process.
+ rstl.release();
+
+ // Shuts down the thread pool and waits for index builds to finish.
+ // Depends on setKillAllOperations() above to interrupt the index build operations.
+ IndexBuildsCoordinator::get(serviceContext)->shutdown();
+
+ // No new readers can come in after the releasing the RSTL, as previously before releasing
+ // the RSTL, we made sure that all new operations will be immediately interrupted by setting
+ // ServiceContext::_globalKill to true. Reacquires RSTL in mode X.
+ rstl.reacquire();
+
+ // We are expected to have no active readers while performing
+ // markAsCleanShutdownIfPossible() step. We guarantee that there are no active readers at
+ // this point due to:
+ // 1) Acquiring RSTL in mode X as all readers (except single phase hybrid index builds on
+ // secondaries) are expected to hold RSTL in mode IX.
+ // 2) By waiting for all index build to finish.
+ repl::ReplicationCoordinator::get(serviceContext)->markAsCleanShutdownIfPossible(opCtx);
+ }
ReplicaSetMonitor::shutdown();
diff --git a/src/mongo/db/repl/replication_consistency_markers_impl.cpp b/src/mongo/db/repl/replication_consistency_markers_impl.cpp
index d45aa9c492f..8d7046a89f2 100644
--- a/src/mongo/db/repl/replication_consistency_markers_impl.cpp
+++ b/src/mongo/db/repl/replication_consistency_markers_impl.cpp
@@ -155,12 +155,15 @@ void ReplicationConsistencyMarkersImpl::clearInitialSyncFlag(OperationContext* o
<< MinValidDocument::kMinValidTermFieldName << time.getTerm()
<< MinValidDocument::kAppliedThroughFieldName << time));
- // We clear the initial sync flag at the 'lastAppliedOpTime'. This is unnecessary, since there
- // should not be any stable checkpoints being taken that this write could inadvertantly enter.
- // This 'lastAppliedOpTime' will be the first stable timestamp candidate, so it will be in the
- // first stable checkpoint taken after initial sync. This provides more clarity than providing
- // no timestamp.
- update.timestamp = time.getTimestamp();
+ // As we haven't yet updated our initialDataTimestamp from
+ // Timestamp::kAllowUnstableCheckpointsSentinel to lastAppliedTimestamp, we are only allowed to
+ // take unstable checkpoints. And, this "lastAppliedTimestamp" will be the first stable
+ // checkpoint taken after initial sync. So, no way this minValid update can be part of a stable
+ // checkpoint taken earlier than lastAppliedTimestamp. So, it's safe to make it as an
+ // non-timestamped write. Also, this has to be non-timestamped write because we may have readers
+ // at lastAppliedTimestamp, commiting the storage writes before or at such timestamps is
+ // illegal.
+ update.timestamp = Timestamp();
_updateMinValidDocument(opCtx, update);
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index b46b7ed6804..b0433698b2e 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -134,6 +134,15 @@ public:
virtual void shutdown(OperationContext* opCtx) = 0;
/**
+ * Performs some bookkeeping to make sure that it's a clean shutdown (i.e. dataset is
+ * consistent with top of the oplog).
+ * This should be called after calling shutdown() and should make sure that that are no
+ * active readers while executing this method as this does perform timestamped storage
+ * writes at lastAppliedTimestamp.
+ */
+ virtual void markAsCleanShutdownIfPossible(OperationContext* opCtx) = 0;
+
+ /**
* Returns a reference to the parsed command line arguments that are related to replication.
*/
virtual const ReplSettings& getSettings() const = 0;
diff --git a/src/mongo/db/repl/replication_coordinator_external_state.h b/src/mongo/db/repl/replication_coordinator_external_state.h
index e16c35fb016..a2fdb9a1d71 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state.h
@@ -102,6 +102,15 @@ public:
virtual void shutdown(OperationContext* opCtx) = 0;
/**
+ * Clears appliedThrough to indicate that the dataset is consistent with top of the
+ * oplog on shutdown.
+ * This should be called after calling shutdown() and should be called holding RSTL
+ * in mode X to make sure that that are no active readers while executing this method
+ * as this does perform timestamped minvalid writes at lastAppliedTimestamp.
+ */
+ virtual void clearAppliedThroughIfCleanShutdown(OperationContext* opCtx) = 0;
+
+ /**
* Returns task executor for scheduling tasks to be run asynchronously.
*/
virtual executor::TaskExecutor* getTaskExecutor() const = 0;
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index ee09e53c44a..7e8fb6ba89a 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -340,6 +340,40 @@ void ReplicationCoordinatorExternalStateImpl::startThreads(const ReplSettings& s
_startedThreads = true;
}
+void ReplicationCoordinatorExternalStateImpl::clearAppliedThroughIfCleanShutdown(
+ OperationContext* opCtx) {
+ {
+ stdx::lock_guard<Latch> lk(_threadMutex);
+ if (!_startedThreads) {
+ return;
+ }
+ invariant(_inShutdown);
+ }
+
+ // Ensure that all writes are visible before reading. If we failed mid-batch, it would be
+ // possible to read from a kLastApplied ReadSource where not all writes to the minValid document
+ // are visible, generating a writeConflict that would not resolve.
+ opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+
+ auto loadLastOpTimeAndWallTimeResult = loadLastOpTimeAndWallTime(opCtx);
+ if (_replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx).isNull() &&
+ loadLastOpTimeAndWallTimeResult.isOK() &&
+ loadLastOpTimeAndWallTimeResult.getValue().opTime ==
+ _replicationProcess->getConsistencyMarkers()->getAppliedThrough(opCtx)) {
+ // Clear the appliedThrough marker to indicate we are consistent with the top of the
+ // oplog. We record this update at the 'lastAppliedOpTime'. If there are any outstanding
+ // checkpoints being taken, they should only reflect this write if they see all writes up
+ // to our 'lastAppliedOpTime'.
+ auto lastAppliedOpTime = repl::ReplicationCoordinator::get(opCtx)->getMyLastAppliedOpTime();
+
+ invariant(opCtx->lockState()->isRSTLExclusive());
+ // Since we acquired RSTL in mode X, there can't be any active readers. So, it's safe to
+ // write the minvalid document to the storage.
+ _replicationProcess->getConsistencyMarkers()->clearAppliedThrough(
+ opCtx, lastAppliedOpTime.getTimestamp());
+ }
+}
+
void ReplicationCoordinatorExternalStateImpl::shutdown(OperationContext* opCtx) {
stdx::unique_lock<Latch> lk(_threadMutex);
_inShutdown = true;
@@ -370,25 +404,6 @@ void ReplicationCoordinatorExternalStateImpl::shutdown(OperationContext* opCtx)
// _taskExecutor outside of _threadMutex because once _startedThreads is set to true, the
// _taskExecutor pointer never changes.
_taskExecutor->join();
-
- // Ensure that all writes are visible before reading. If we failed mid-batch, it would be
- // possible to read from a kLastApplied ReadSource where not all writes to the minValid document
- // are visible, generating a writeConflict that would not resolve.
- opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
-
- auto loadLastOpTimeAndWallTimeResult = loadLastOpTimeAndWallTime(opCtx);
- if (_replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx).isNull() &&
- loadLastOpTimeAndWallTimeResult.isOK() &&
- loadLastOpTimeAndWallTimeResult.getValue().opTime ==
- _replicationProcess->getConsistencyMarkers()->getAppliedThrough(opCtx)) {
- // Clear the appliedThrough marker to indicate we are consistent with the top of the
- // oplog. We record this update at the 'lastAppliedOpTime'. If there are any outstanding
- // checkpoints being taken, they should only reflect this write if they see all writes up
- // to our 'lastAppliedOpTime'.
- auto lastAppliedOpTime = repl::ReplicationCoordinator::get(opCtx)->getMyLastAppliedOpTime();
- _replicationProcess->getConsistencyMarkers()->clearAppliedThrough(
- opCtx, lastAppliedOpTime.getTimestamp());
- }
}
executor::TaskExecutor* ReplicationCoordinatorExternalStateImpl::getTaskExecutor() const {
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.h b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
index 4ebbeaa9eb6..b57dab30710 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
@@ -74,6 +74,9 @@ public:
virtual bool isInitialSyncFlagSet(OperationContext* opCtx) override;
virtual void shutdown(OperationContext* opCtx);
+
+ virtual void clearAppliedThroughIfCleanShutdown(OperationContext* opCtx);
+
virtual executor::TaskExecutor* getTaskExecutor() const override;
virtual ThreadPool* getDbWorkThreadPool() const override;
virtual Status initializeReplSetStorage(OperationContext* opCtx, const BSONObj& config);
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
index 053f4d460a8..4f9e195937a 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
@@ -82,6 +82,9 @@ Status ReplicationCoordinatorExternalStateMock::initializeReplSetStorage(Operati
void ReplicationCoordinatorExternalStateMock::shutdown(OperationContext*) {}
+void ReplicationCoordinatorExternalStateMock::clearAppliedThroughIfCleanShutdown(
+ OperationContext*) {}
+
executor::TaskExecutor* ReplicationCoordinatorExternalStateMock::getTaskExecutor() const {
return nullptr;
}
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
index afcc16ce995..be37389312a 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
@@ -66,6 +66,7 @@ public:
virtual bool isInitialSyncFlagSet(OperationContext* opCtx) override;
virtual void shutdown(OperationContext* opCtx);
+ virtual void clearAppliedThroughIfCleanShutdown(OperationContext* opCtx);
virtual executor::TaskExecutor* getTaskExecutor() const override;
virtual ThreadPool* getDbWorkThreadPool() const override;
virtual Status initializeReplSetStorage(OperationContext* opCtx, const BSONObj& config);
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 9eb3a3848af..1d1fed3e8ff 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -899,6 +899,10 @@ void ReplicationCoordinatorImpl::shutdown(OperationContext* opCtx) {
_replExecutor->join();
}
+void ReplicationCoordinatorImpl::markAsCleanShutdownIfPossible(OperationContext* opCtx) {
+ _externalState->clearAppliedThroughIfCleanShutdown(opCtx);
+}
+
const ReplSettings& ReplicationCoordinatorImpl::getSettings() const {
return _settings;
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 5fd6f46dd94..78825755349 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -110,6 +110,8 @@ public:
virtual void shutdown(OperationContext* opCtx) override;
+ void markAsCleanShutdownIfPossible(OperationContext* opCtx) override;
+
virtual const ReplSettings& getSettings() const override;
virtual Mode getReplicationMode() const override;
diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp
index 835029ac9a9..47fdddc82a6 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_mock.cpp
@@ -85,6 +85,10 @@ void ReplicationCoordinatorMock::shutdown(OperationContext*) {
// TODO
}
+void ReplicationCoordinatorMock::markAsCleanShutdownIfPossible(OperationContext*) {
+ // TODO
+}
+
const ReplSettings& ReplicationCoordinatorMock::getSettings() const {
return _settings;
}
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index 2893920d1e5..9daca07d623 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -72,6 +72,8 @@ public:
virtual void shutdown(OperationContext* opCtx);
+ void markAsCleanShutdownIfPossible(OperationContext* opCtx) override;
+
virtual void appendDiagnosticBSON(BSONObjBuilder* bob) override {}
virtual const ReplSettings& getSettings() const;
diff --git a/src/mongo/db/repl/replication_coordinator_noop.cpp b/src/mongo/db/repl/replication_coordinator_noop.cpp
index 6f502574645..ffa6fb8b09b 100644
--- a/src/mongo/db/repl/replication_coordinator_noop.cpp
+++ b/src/mongo/db/repl/replication_coordinator_noop.cpp
@@ -43,6 +43,8 @@ void ReplicationCoordinatorNoOp::enterTerminalShutdown() {}
void ReplicationCoordinatorNoOp::shutdown(OperationContext* opCtx) {}
+void ReplicationCoordinatorNoOp::markAsCleanShutdownIfPossible(OperationContext* opCtx) {}
+
ReplicationCoordinator::Mode ReplicationCoordinatorNoOp::getReplicationMode() const {
return modeReplSet;
}
diff --git a/src/mongo/db/repl/replication_coordinator_noop.h b/src/mongo/db/repl/replication_coordinator_noop.h
index ea825567454..cbc755a8b76 100644
--- a/src/mongo/db/repl/replication_coordinator_noop.h
+++ b/src/mongo/db/repl/replication_coordinator_noop.h
@@ -53,6 +53,8 @@ public:
void shutdown(OperationContext* opCtx) final;
+ void markAsCleanShutdownIfPossible(OperationContext* opCtx) final;
+
ServiceContext* getServiceContext() final {
return _service;
}
diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp
index 05902a7756e..fcacb5041fc 100644
--- a/src/mongo/dbtests/storage_timestamp_tests.cpp
+++ b/src/mongo/dbtests/storage_timestamp_tests.cpp
@@ -1626,7 +1626,7 @@ public:
expectedMinValidWithUnsetFlag.setAppliedThrough(repl::OpTime(presentTs, presentTerm));
assertMinValidDocumentAtTimestamp(nss, nullTs, expectedMinValidWithUnsetFlag);
- assertMinValidDocumentAtTimestamp(nss, pastTs, expectedMinValidWithSetFlag);
+ assertMinValidDocumentAtTimestamp(nss, pastTs, expectedMinValidWithUnsetFlag);
assertMinValidDocumentAtTimestamp(nss, presentTs, expectedMinValidWithUnsetFlag);
assertMinValidDocumentAtTimestamp(nss, futureTs, expectedMinValidWithUnsetFlag);
}
diff --git a/src/mongo/embedded/replication_coordinator_embedded.cpp b/src/mongo/embedded/replication_coordinator_embedded.cpp
index f8010210e61..9bbe51bdc4f 100644
--- a/src/mongo/embedded/replication_coordinator_embedded.cpp
+++ b/src/mongo/embedded/replication_coordinator_embedded.cpp
@@ -53,6 +53,8 @@ void ReplicationCoordinatorEmbedded::enterTerminalShutdown() {}
void ReplicationCoordinatorEmbedded::shutdown(OperationContext* opCtx) {}
+void ReplicationCoordinatorEmbedded::markAsCleanShutdownIfPossible(OperationContext* opCtx) {}
+
const ReplSettings& ReplicationCoordinatorEmbedded::getSettings() const {
static ReplSettings _settings;
return _settings;
diff --git a/src/mongo/embedded/replication_coordinator_embedded.h b/src/mongo/embedded/replication_coordinator_embedded.h
index eac2c5e97ae..7e346575312 100644
--- a/src/mongo/embedded/replication_coordinator_embedded.h
+++ b/src/mongo/embedded/replication_coordinator_embedded.h
@@ -51,6 +51,8 @@ public:
void shutdown(OperationContext* opCtx) override;
+ void markAsCleanShutdownIfPossible(OperationContext* opCtx) override;
+
// Returns the ServiceContext where this instance runs.
ServiceContext* getServiceContext() override {
return _service;