summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJudah Schvimer <judah@mongodb.com>2020-05-10 17:22:26 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-12 12:18:43 +0000
commit1417eee440b4132e24d1388011d681e2c9fcec41 (patch)
tree6851e0d1db2331a8ea7b7a659a53f3389f4ef616
parentf6609ee18cd063170391578a9afaee047eee28a7 (diff)
downloadmongo-1417eee440b4132e24d1388011d681e2c9fcec41.tar.gz
SERVER-48010 Substitute ghost timestamp with no-op write in multi-statement txn multikey sidetxn write
-rw-r--r--src/mongo/db/catalog/index_catalog_entry_impl.cpp60
-rw-r--r--src/mongo/db/op_observer_impl.cpp10
-rw-r--r--src/mongo/dbtests/storage_timestamp_tests.cpp22
3 files changed, 54 insertions, 38 deletions
diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.cpp b/src/mongo/db/catalog/index_catalog_entry_impl.cpp
index f62335fd8b7..5d7454a4b27 100644
--- a/src/mongo/db/catalog/index_catalog_entry_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_entry_impl.cpp
@@ -46,6 +46,7 @@
#include "mongo/db/matcher/expression.h"
#include "mongo/db/matcher/expression_parser.h"
#include "mongo/db/multi_key_path_tracker.h"
+#include "mongo/db/op_observer.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/pipeline/expression_context.h"
#include "mongo/db/query/collation/collator_factory_interface.h"
@@ -271,33 +272,42 @@ Status IndexCatalogEntryImpl::_setMultikeyInMultiDocumentTransaction(
return {ErrorCodes::SnapshotUnavailable, "index not visible in side transaction"};
}
- writeConflictRetry(opCtx, "set index multikey", ns().ns(), [&] {
- WriteUnitOfWork wuow(opCtx);
-
- // If we have a prepare optime for recovery, then we always use that. During recovery of
- // prepared transactions, the logical clock may not yet be initialized, so we use the
- // prepare timestamp of the transaction for this write. This is safe since the prepare
- // timestamp is always <= the commit timestamp of a transaction, which satisfies the
- // correctness requirement for multikey writes i.e. they must occur at or before the
- // first write that set the multikey flag.
- auto recoveryPrepareOpTime = txnParticipant.getPrepareOpTimeForRecovery();
- Timestamp writeTs = recoveryPrepareOpTime.isNull()
- ? LogicalClock::get(opCtx)->getClusterTime().asTimestamp()
- : recoveryPrepareOpTime.getTimestamp();
-
- auto status = opCtx->recoveryUnit()->setTimestamp(writeTs);
- if (status.code() == ErrorCodes::BadValue) {
- LOGV2(20352,
- "Temporarily could not timestamp the multikey catalog write, retrying.",
- "reason"_attr = status.reason());
- throw WriteConflictException();
- }
- fassert(31164, status);
+ writeConflictRetry(
+ opCtx, "set index multikey", ns().ns(), [&] {
+ WriteUnitOfWork wuow(opCtx);
+
+ // If we have a prepare optime for recovery, then we always use that. This is safe since
+ // the prepare timestamp is always <= the commit timestamp of a transaction, which
+ // satisfies the correctness requirement for multikey writes i.e. they must occur at or
+ // before the first write that set the multikey flag. This only occurs when
+ // reconstructing prepared transactions, and not during replication recovery oplog
+ // application.
+ auto recoveryPrepareOpTime = txnParticipant.getPrepareOpTimeForRecovery();
+ if (!recoveryPrepareOpTime.isNull()) {
+ auto status =
+ opCtx->recoveryUnit()->setTimestamp(recoveryPrepareOpTime.getTimestamp());
+ if (status.code() == ErrorCodes::BadValue) {
+ LOGV2(20352,
+ "Temporarily could not timestamp the multikey catalog write, retrying.",
+ "reason"_attr = status.reason());
+ throw WriteConflictException();
+ }
+ fassert(31164, status);
+ } else {
+ // If there is no recovery prepare OpTime, then this node must be a primary. We
+ // write a noop oplog entry to get a properly ordered timestamp.
+ invariant(opCtx->writesAreReplicated());
+
+ auto msg = BSON("msg"
+ << "Setting index to multikey"
+ << "coll" << ns().ns() << "index" << _descriptor->indexName());
+ opCtx->getClient()->getServiceContext()->getOpObserver()->onOpMessage(opCtx, msg);
+ }
- _catalogSetMultikey(opCtx, multikeyPaths);
+ _catalogSetMultikey(opCtx, multikeyPaths);
- wuow.commit();
- });
+ wuow.commit();
+ });
return Status::OK();
}
diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp
index acfad92cd8c..8735e82e287 100644
--- a/src/mongo/db/op_observer_impl.cpp
+++ b/src/mongo/db/op_observer_impl.cpp
@@ -347,12 +347,10 @@ void OpObserverImpl::onStartIndexBuildSinglePhase(OperationContext* opCtx,
// 4. All other cases, we generate a timestamp by writing a no-op oplog entry. This is
// better than using a ghost timestamp. Writing an oplog entry ensures this node is
// primary.
- onInternalOpMessage(
- opCtx,
- {},
- boost::none,
- BSON("msg" << std::string(str::stream() << "Creating indexes. Coll: " << nss)),
- boost::none);
+ auto msg = BSON("msg"
+ << "Creating indexes"
+ << "coll" << nss.ns());
+ onOpMessage(opCtx, msg);
}
void OpObserverImpl::onCommitIndexBuild(OperationContext* opCtx,
diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp
index f79de6a7570..d148f48d416 100644
--- a/src/mongo/dbtests/storage_timestamp_tests.cpp
+++ b/src/mongo/dbtests/storage_timestamp_tests.cpp
@@ -686,7 +686,8 @@ public:
const bool match = (expectedMultikeyPaths == actualMultikeyPaths);
if (!match) {
- FAIL(str::stream() << "Expected: " << dumpMultikeyPaths(expectedMultikeyPaths)
+ FAIL(str::stream() << "TS: " << ts.toString()
+ << ", Expected: " << dumpMultikeyPaths(expectedMultikeyPaths)
<< ", Actual: " << dumpMultikeyPaths(actualMultikeyPaths));
}
ASSERT_TRUE(match);
@@ -1528,11 +1529,14 @@ public:
const auto beforeTxnTime = _clock->reserveTicks(1);
auto beforeTxnTs = beforeTxnTime.asTimestamp();
- auto commitEntryTs = beforeTxnTime.addTicks(1).asTimestamp();
+ const auto multikeyNoopTime = beforeTxnTime.addTicks(1);
+ auto multikeyNoopTs = multikeyNoopTime.asTimestamp();
+ auto commitEntryTs = multikeyNoopTime.addTicks(1).asTimestamp();
- LOGV2(22502, "Present TS: {presentTs}", "presentTs"_attr = presentTs);
- LOGV2(22503, "Before transaction TS: {beforeTxnTs}", "beforeTxnTs"_attr = beforeTxnTs);
- LOGV2(22504, "Commit entry TS: {commitEntryTs}", "commitEntryTs"_attr = commitEntryTs);
+ LOGV2(22502, "Present time", "timestamp"_attr = presentTs);
+ LOGV2(22503, "Before transaction time", "timestamp"_attr = beforeTxnTs);
+ LOGV2(4801000, "Multikey noop time", "timestamp"_attr = multikeyNoopTs);
+ LOGV2(22504, "Commit entry time", "timestamp"_attr = commitEntryTs);
const auto sessionId = makeLogicalSessionIdForTest();
_opCtx->setLogicalSessionId(sessionId);
@@ -1561,8 +1565,9 @@ public:
auto coll = autoColl.getCollection();
// Make sure the transaction committed and its writes were timestamped correctly.
- assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
assertDocumentAtTimestamp(coll, presentTs, BSONObj());
+ assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
+ assertDocumentAtTimestamp(coll, multikeyNoopTs, BSONObj());
assertDocumentAtTimestamp(coll, commitEntryTs, doc);
assertDocumentAtTimestamp(coll, nullTs, doc);
@@ -1575,9 +1580,12 @@ public:
// commit time.
assertMultikeyPaths(_opCtx, coll, indexName, presentTs, false /* shouldBeMultikey */, {{}});
assertMultikeyPaths(
- _opCtx, coll, indexName, beforeTxnTs, true /* shouldBeMultikey */, {{0}});
+ _opCtx, coll, indexName, beforeTxnTs, false /* shouldBeMultikey */, {{}});
+ assertMultikeyPaths(
+ _opCtx, coll, indexName, multikeyNoopTs, true /* shouldBeMultikey */, {{0}});
assertMultikeyPaths(
_opCtx, coll, indexName, commitEntryTs, true /* shouldBeMultikey */, {{0}});
+ assertMultikeyPaths(_opCtx, coll, indexName, nullTs, true /* shouldBeMultikey */, {{0}});
}
};