summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLingzhi Deng <lingzhi.deng@mongodb.com>2021-01-24 21:54:09 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-02-22 03:38:51 +0000
commit85a8c709a0baaffcd60fe3a0c04d5f7621a805a0 (patch)
treec00732c120a588e823c530243c2738436c64ed70 /src
parent77606de4b8561600471040d7d3a20b51af2b65d4 (diff)
downloadmongo-85a8c709a0baaffcd60fe3a0c04d5f7621a805a0.tar.gz
SERVER-53932: Round up multi-key write during recovery of prepared transactions to the stable timestamp
(cherry picked from commit 5d2efc040b405871099fbd2a8547212db3755e02) (cherry picked from commit 5821e5bc1e2e8c4ed3e791a60a104d57f104caf1) (cherry picked from commit 4419a5c2a0d099b40b2155dd4def37802286cb7c)
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/catalog/catalog_control_test.cpp6
-rw-r--r--src/mongo/db/catalog/index_catalog_entry_impl.cpp11
-rw-r--r--src/mongo/db/storage/storage_engine.h11
-rw-r--r--src/mongo/db/storage/storage_engine_impl.cpp8
-rw-r--r--src/mongo/db/storage/storage_engine_impl.h4
5 files changed, 39 insertions, 1 deletions
diff --git a/src/mongo/db/catalog/catalog_control_test.cpp b/src/mongo/db/catalog/catalog_control_test.cpp
index da1cd24a8c4..b68d1003baf 100644
--- a/src/mongo/db/catalog/catalog_control_test.cpp
+++ b/src/mongo/db/catalog/catalog_control_test.cpp
@@ -100,9 +100,15 @@ public:
return false;
}
void clearDropPendingState() final {}
+ Timestamp getStableTimestamp() const final {
+ return {};
+ }
virtual Timestamp getInitialDataTimestamp() final {
return {};
}
+ Timestamp getOldestTimestamp() const final {
+ return {};
+ }
Timestamp getAllDurableTimestamp() const final {
return {};
}
diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.cpp b/src/mongo/db/catalog/index_catalog_entry_impl.cpp
index 0537f6565d1..c1e7b90e368 100644
--- a/src/mongo/db/catalog/index_catalog_entry_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_entry_impl.cpp
@@ -323,9 +323,18 @@ void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx,
// correctness requirement for multikey writes i.e. they must occur at or before the
// first write that set the multikey flag.
auto recoveryPrepareOpTime = txnParticipant.getPrepareOpTimeForRecovery();
+ // We might replay a prepared transaction behind the oldest timestamp during initial
+ // sync or behind the stable timestamp during rollback. During initial sync, we
+ // may not have a stable timestamp. Therefore, we need to round up
+ // the multi-key write timestamp to the max of the three so that we don't write
+ // behind the oldest/stable timestamp. This code path is only hit during initial
+ // sync/recovery when reconstructing prepared transactions and so we don't expect
+ // the oldest/stable timestamp to advance concurrently.
Timestamp writeTs = recoveryPrepareOpTime.isNull()
? LogicalClock::get(opCtx)->getClusterTime().asTimestamp()
- : recoveryPrepareOpTime.getTimestamp();
+ : std::max({recoveryPrepareOpTime.getTimestamp(),
+ opCtx->getServiceContext()->getStorageEngine()->getOldestTimestamp(),
+ opCtx->getServiceContext()->getStorageEngine()->getStableTimestamp()});
auto status = opCtx->recoveryUnit()->setTimestamp(writeTs);
if (status.code() == ErrorCodes::BadValue) {
diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h
index 5b24a659c31..eb6f0025313 100644
--- a/src/mongo/db/storage/storage_engine.h
+++ b/src/mongo/db/storage/storage_engine.h
@@ -485,6 +485,11 @@ public:
virtual void setStableTimestamp(Timestamp stableTimestamp, bool force = false) {}
/**
+ * Returns the stable timestamp.
+ */
+ virtual Timestamp getStableTimestamp() const = 0;
+
+ /**
* Tells the storage engine the timestamp of the data at startup. This is necessary because
* timestamps are not persisted in the storage layer.
*/
@@ -514,6 +519,12 @@ public:
virtual void setOldestTimestamp(Timestamp timestamp) {}
/**
+ * Gets the oldest timestamp for which the storage engine must maintain snapshot history
+ * through.
+ */
+ virtual Timestamp getOldestTimestamp() const = 0;
+
+ /**
* Sets a callback which returns the timestamp of the oldest oplog entry involved in an
* active MongoDB transaction. The storage engine calls this function to determine how much
* oplog it must preserve.
diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp
index 8913244e922..ec841dcf01e 100644
--- a/src/mongo/db/storage/storage_engine_impl.cpp
+++ b/src/mongo/db/storage/storage_engine_impl.cpp
@@ -720,6 +720,10 @@ void StorageEngineImpl::setStableTimestamp(Timestamp stableTimestamp, bool force
_engine->setStableTimestamp(stableTimestamp, force);
}
+Timestamp StorageEngineImpl::getStableTimestamp() const {
+ return _engine->getStableTimestamp();
+}
+
void StorageEngineImpl::setInitialDataTimestamp(Timestamp initialDataTimestamp) {
_engine->setInitialDataTimestamp(initialDataTimestamp);
}
@@ -737,6 +741,10 @@ void StorageEngineImpl::setOldestTimestamp(Timestamp newOldestTimestamp) {
_engine->setOldestTimestamp(newOldestTimestamp, force);
}
+Timestamp StorageEngineImpl::getOldestTimestamp() const {
+ return _engine->getOldestTimestamp();
+};
+
void StorageEngineImpl::setOldestActiveTransactionTimestampCallback(
StorageEngine::OldestActiveTransactionTimestampCallback callback) {
_engine->setOldestActiveTransactionTimestampCallback(callback);
diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h
index 15f18307737..98582bbe837 100644
--- a/src/mongo/db/storage/storage_engine_impl.h
+++ b/src/mongo/db/storage/storage_engine_impl.h
@@ -121,6 +121,8 @@ public:
virtual void setStableTimestamp(Timestamp stableTimestamp, bool force = false) override;
+ virtual Timestamp getStableTimestamp() const override;
+
virtual void setInitialDataTimestamp(Timestamp initialDataTimestamp) override;
virtual Timestamp getInitialDataTimestamp() override;
@@ -129,6 +131,8 @@ public:
virtual void setOldestTimestamp(Timestamp newOldestTimestamp) override;
+ virtual Timestamp getOldestTimestamp() const override;
+
virtual void setOldestActiveTransactionTimestampCallback(
StorageEngine::OldestActiveTransactionTimestampCallback) override;