summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLingzhi Deng <lingzhi.deng@mongodb.com>2021-01-24 21:54:09 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-02-22 03:38:51 +0000
commit85a8c709a0baaffcd60fe3a0c04d5f7621a805a0 (patch)
treec00732c120a588e823c530243c2738436c64ed70
parent77606de4b8561600471040d7d3a20b51af2b65d4 (diff)
downloadmongo-85a8c709a0baaffcd60fe3a0c04d5f7621a805a0.tar.gz
SERVER-53932: Round up multi-key write during recovery of prepared transactions to the stable timestamp
(cherry picked from commit 5d2efc040b405871099fbd2a8547212db3755e02) (cherry picked from commit 5821e5bc1e2e8c4ed3e791a60a104d57f104caf1) (cherry picked from commit 4419a5c2a0d099b40b2155dd4def37802286cb7c)
-rw-r--r--etc/backports_required_for_multiversion_tests.yml2
-rw-r--r--jstests/replsets/rollback_reconstructs_transactions_prepared_before_stable.js9
-rw-r--r--src/mongo/db/catalog/catalog_control_test.cpp6
-rw-r--r--src/mongo/db/catalog/index_catalog_entry_impl.cpp11
-rw-r--r--src/mongo/db/storage/storage_engine.h11
-rw-r--r--src/mongo/db/storage/storage_engine_impl.cpp8
-rw-r--r--src/mongo/db/storage/storage_engine_impl.h4
7 files changed, 50 insertions, 1 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 2328b36d5bc..f493237c0ac 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -96,6 +96,8 @@ all:
test_file: jstests/sharding/sharding_task_executor_pool_matching_policy.js
- ticket: SERVER-53612
test_file: jstests/replsets/election_handoff_not_immediately_electable.js
+ - ticket: SERVER-53932
+ test_file: jstests/replsets/rollback_reconstructs_transactions_prepared_before_stable.js
# Tests that should only be excluded from particular suites should be listed under that suite.
suites:
diff --git a/jstests/replsets/rollback_reconstructs_transactions_prepared_before_stable.js b/jstests/replsets/rollback_reconstructs_transactions_prepared_before_stable.js
index 94bfb9609b4..fb30414b6e3 100644
--- a/jstests/replsets/rollback_reconstructs_transactions_prepared_before_stable.js
+++ b/jstests/replsets/rollback_reconstructs_transactions_prepared_before_stable.js
@@ -21,11 +21,17 @@ let testDB = primary.getDB(dbName);
let testColl = testDB.getCollection(collName);
assert.commandWorked(testDB.runCommand({create: collName}));
+// Set up another collection for multi-key write in transaction.
+const anotherCollName = "anotherColl";
+const anotherColl = testDB.getCollection(anotherCollName);
+assert.commandWorked(anotherColl.createIndex({"$**": 1}));
+
// Start a session on the primary.
let session = primary.startSession();
const sessionID = session.getSessionId();
let sessionDB = session.getDatabase(dbName);
let sessionColl = sessionDB.getCollection(collName);
+const sessionAnotherColl = sessionDB.getCollection(anotherCollName);
assert.commandWorked(sessionColl.insert({_id: 0}));
@@ -33,6 +39,9 @@ assert.commandWorked(sessionColl.insert({_id: 0}));
session.startTransaction();
assert.commandWorked(sessionColl.insert({_id: 1}));
assert.commandWorked(sessionColl.update({_id: 0}, {$set: {a: 1}}));
+// Trigger multi-key writes in the same transaction so that we can also test multi-key writes during
+// recovery of the prepared transaction.
+assert.commandWorked(sessionAnotherColl.insert({a: [1, 2, 3]}));
const prepareTimestamp = PrepareHelpers.prepareTransaction(session);
// Fastcount reflects the insert of a prepared transaction.
diff --git a/src/mongo/db/catalog/catalog_control_test.cpp b/src/mongo/db/catalog/catalog_control_test.cpp
index da1cd24a8c4..b68d1003baf 100644
--- a/src/mongo/db/catalog/catalog_control_test.cpp
+++ b/src/mongo/db/catalog/catalog_control_test.cpp
@@ -100,9 +100,15 @@ public:
return false;
}
void clearDropPendingState() final {}
+ Timestamp getStableTimestamp() const final {
+ return {};
+ }
virtual Timestamp getInitialDataTimestamp() final {
return {};
}
+ Timestamp getOldestTimestamp() const final {
+ return {};
+ }
Timestamp getAllDurableTimestamp() const final {
return {};
}
diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.cpp b/src/mongo/db/catalog/index_catalog_entry_impl.cpp
index 0537f6565d1..c1e7b90e368 100644
--- a/src/mongo/db/catalog/index_catalog_entry_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_entry_impl.cpp
@@ -323,9 +323,18 @@ void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx,
// correctness requirement for multikey writes i.e. they must occur at or before the
// first write that set the multikey flag.
auto recoveryPrepareOpTime = txnParticipant.getPrepareOpTimeForRecovery();
+ // We might replay a prepared transaction behind the oldest timestamp during initial
+ // sync or behind the stable timestamp during rollback. During initial sync, we
+ // may not have a stable timestamp. Therefore, we need to round up
+ // the multi-key write timestamp to the max of the three so that we don't write
+ // behind the oldest/stable timestamp. This code path is only hit during initial
+ // sync/recovery when reconstructing prepared transactions and so we don't expect
+ // the oldest/stable timestamp to advance concurrently.
Timestamp writeTs = recoveryPrepareOpTime.isNull()
? LogicalClock::get(opCtx)->getClusterTime().asTimestamp()
- : recoveryPrepareOpTime.getTimestamp();
+ : std::max({recoveryPrepareOpTime.getTimestamp(),
+ opCtx->getServiceContext()->getStorageEngine()->getOldestTimestamp(),
+ opCtx->getServiceContext()->getStorageEngine()->getStableTimestamp()});
auto status = opCtx->recoveryUnit()->setTimestamp(writeTs);
if (status.code() == ErrorCodes::BadValue) {
diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h
index 5b24a659c31..eb6f0025313 100644
--- a/src/mongo/db/storage/storage_engine.h
+++ b/src/mongo/db/storage/storage_engine.h
@@ -485,6 +485,11 @@ public:
virtual void setStableTimestamp(Timestamp stableTimestamp, bool force = false) {}
/**
+ * Returns the stable timestamp.
+ */
+ virtual Timestamp getStableTimestamp() const = 0;
+
+ /**
* Tells the storage engine the timestamp of the data at startup. This is necessary because
* timestamps are not persisted in the storage layer.
*/
@@ -514,6 +519,12 @@ public:
virtual void setOldestTimestamp(Timestamp timestamp) {}
/**
+ * Gets the oldest timestamp for which the storage engine must maintain snapshot history
+ * through.
+ */
+ virtual Timestamp getOldestTimestamp() const = 0;
+
+ /**
* Sets a callback which returns the timestamp of the oldest oplog entry involved in an
* active MongoDB transaction. The storage engine calls this function to determine how much
* oplog it must preserve.
diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp
index 8913244e922..ec841dcf01e 100644
--- a/src/mongo/db/storage/storage_engine_impl.cpp
+++ b/src/mongo/db/storage/storage_engine_impl.cpp
@@ -720,6 +720,10 @@ void StorageEngineImpl::setStableTimestamp(Timestamp stableTimestamp, bool force
_engine->setStableTimestamp(stableTimestamp, force);
}
+Timestamp StorageEngineImpl::getStableTimestamp() const {
+ return _engine->getStableTimestamp();
+}
+
void StorageEngineImpl::setInitialDataTimestamp(Timestamp initialDataTimestamp) {
_engine->setInitialDataTimestamp(initialDataTimestamp);
}
@@ -737,6 +741,10 @@ void StorageEngineImpl::setOldestTimestamp(Timestamp newOldestTimestamp) {
_engine->setOldestTimestamp(newOldestTimestamp, force);
}
+Timestamp StorageEngineImpl::getOldestTimestamp() const {
+ return _engine->getOldestTimestamp();
+};
+
void StorageEngineImpl::setOldestActiveTransactionTimestampCallback(
StorageEngine::OldestActiveTransactionTimestampCallback callback) {
_engine->setOldestActiveTransactionTimestampCallback(callback);
diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h
index 15f18307737..98582bbe837 100644
--- a/src/mongo/db/storage/storage_engine_impl.h
+++ b/src/mongo/db/storage/storage_engine_impl.h
@@ -121,6 +121,8 @@ public:
virtual void setStableTimestamp(Timestamp stableTimestamp, bool force = false) override;
+ virtual Timestamp getStableTimestamp() const override;
+
virtual void setInitialDataTimestamp(Timestamp initialDataTimestamp) override;
virtual Timestamp getInitialDataTimestamp() override;
@@ -129,6 +131,8 @@ public:
virtual void setOldestTimestamp(Timestamp newOldestTimestamp) override;
+ virtual Timestamp getOldestTimestamp() const override;
+
virtual void setOldestActiveTransactionTimestampCallback(
StorageEngine::OldestActiveTransactionTimestampCallback) override;