summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDaniel Gottlieb <daniel.gottlieb@mongodb.com>2022-02-08 10:36:43 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-02-08 16:34:26 +0000
commitdf441f20aab3ec13d30ab157d5761d419edc2051 (patch)
tree2ed95f188156356792878a2397dfb9538be32790 /src
parent1938ca8564fb969f68058d68c55f632eec78a665 (diff)
downloadmongo-df441f20aab3ec13d30ab157d5761d419edc2051.tar.gz
SERVER-63308: Accomodate WT-8601.
Move read timestamp visibility against the oplog into MongoDB. WiredTiger historically loses timestamp information across restarts on logged tables. MDB does rely on read_timestamps for visibility against the oplog in some circumstances. While case analysis supports that the WT bug does not manifest in MDB, WT-8601 fully removes timestamps from `WT_UPDATE`s in logged tables. In MDB, the keys for oplog documents are the object's timestamp. MDB will instead use that value to filter an oplog entry's visibilty.
Diffstat (limited to 'src')
-rw-r--r--src/mongo/bson/timestamp.h4
-rw-r--r--src/mongo/db/SConscript1
-rw-r--r--src/mongo/db/repl/SConscript5
-rw-r--r--src/mongo/db/repl/storage_timestamp_test.cpp4111
-rw-r--r--src/mongo/db/service_context_d_test_fixture.cpp33
-rw-r--r--src/mongo/db/service_context_d_test_fixture.h4
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp64
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h10
-rw-r--r--src/mongo/dbtests/SConscript3
-rw-r--r--src/mongo/dbtests/storage_timestamp_tests.cpp4416
10 files changed, 4222 insertions, 4429 deletions
diff --git a/src/mongo/bson/timestamp.h b/src/mongo/bson/timestamp.h
index 579914fe281..3d0befd7450 100644
--- a/src/mongo/bson/timestamp.h
+++ b/src/mongo/bson/timestamp.h
@@ -92,6 +92,10 @@ public:
return static_cast<long long>(asULL());
}
+ std::int64_t asInt64() const {
+ return static_cast<std::int64_t>(asLL());
+ }
+
bool isNull() const {
return secs == 0;
}
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index d56608b9032..49dd3a75584 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -2018,6 +2018,7 @@ env.Library(
'$BUILD_DIR/mongo/db/storage/ephemeral_for_test/storage_ephemeral_for_test',
'$BUILD_DIR/mongo/db/storage/storage_control',
'$BUILD_DIR/mongo/db/storage/storage_options',
+ '$BUILD_DIR/mongo/util/clock_source_mock',
'index_builds_coordinator_mongod',
'service_context_d',
],
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index 6173b799f70..5a1e311c712 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -1643,6 +1643,7 @@ if wiredtiger:
'speculative_majority_read_info_test.cpp',
'split_horizon_test.cpp',
'storage_interface_impl_test.cpp',
+ 'storage_timestamp_test.cpp',
'sync_source_resolver_test.cpp',
'task_runner_test.cpp',
'task_runner_test_fixture.cpp',
@@ -1667,6 +1668,9 @@ if wiredtiger:
'$BUILD_DIR/mongo/db/commands/txn_cmd_request',
'$BUILD_DIR/mongo/db/dbdirectclient',
'$BUILD_DIR/mongo/db/index/index_access_methods',
+ '$BUILD_DIR/mongo/db/index/index_build_interceptor',
+ '$BUILD_DIR/mongo/db/index/skipped_record_tracker',
+ '$BUILD_DIR/mongo/db/index_build_entry_helpers',
'$BUILD_DIR/mongo/db/index_builds_coordinator_mongod',
'$BUILD_DIR/mongo/db/logical_session_id_helpers',
'$BUILD_DIR/mongo/db/logical_time',
@@ -1739,6 +1743,7 @@ if wiredtiger:
'tenant_migration_recipient_service',
'tenant_migration_utils',
'tenant_oplog_processing',
+ 'timestamp_block',
'wait_for_majority_service',
],
)
diff --git a/src/mongo/db/repl/storage_timestamp_test.cpp b/src/mongo/db/repl/storage_timestamp_test.cpp
new file mode 100644
index 00000000000..06037d99109
--- /dev/null
+++ b/src/mongo/db/repl/storage_timestamp_test.cpp
@@ -0,0 +1,4111 @@
+/**
+ * Copyright (C) 2018-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
+
+#include "mongo/platform/basic.h"
+
+#include <cstdint>
+
+#include "mongo/bson/bsonmisc.h"
+#include "mongo/bson/mutable/algorithm.h"
+#include "mongo/bson/simple_bsonobj_comparator.h"
+#include "mongo/bson/timestamp.h"
+#include "mongo/db/catalog/collection.h"
+#include "mongo/db/catalog/collection_catalog.h"
+#include "mongo/db/catalog/create_collection.h"
+#include "mongo/db/catalog/document_validation.h"
+#include "mongo/db/catalog/drop_database.h"
+#include "mongo/db/catalog/drop_indexes.h"
+#include "mongo/db/catalog/index_catalog.h"
+#include "mongo/db/catalog/multi_index_block.h"
+#include "mongo/db/client.h"
+#include "mongo/db/concurrency/write_conflict_exception.h"
+#include "mongo/db/db_raii.h"
+#include "mongo/db/dbdirectclient.h"
+#include "mongo/db/dbhelpers.h"
+#include "mongo/db/global_settings.h"
+#include "mongo/db/index/index_build_interceptor.h"
+#include "mongo/db/index/index_descriptor.h"
+#include "mongo/db/index/wildcard_access_method.h"
+#include "mongo/db/index_build_entry_helpers.h"
+#include "mongo/db/index_builds_coordinator.h"
+#include "mongo/db/multi_key_path_tracker.h"
+#include "mongo/db/op_observer_impl.h"
+#include "mongo/db/op_observer_registry.h"
+#include "mongo/db/query/wildcard_multikey_paths.h"
+#include "mongo/db/repl/apply_ops.h"
+#include "mongo/db/repl/drop_pending_collection_reaper.h"
+#include "mongo/db/repl/multiapplier.h"
+#include "mongo/db/repl/oplog.h"
+#include "mongo/db/repl/oplog_applier.h"
+#include "mongo/db/repl/oplog_applier_impl.h"
+#include "mongo/db/repl/oplog_applier_impl_test_fixture.h"
+#include "mongo/db/repl/oplog_entry.h"
+#include "mongo/db/repl/oplog_entry_test_helpers.h"
+#include "mongo/db/repl/optime.h"
+#include "mongo/db/repl/repl_client_info.h"
+#include "mongo/db/repl/replication_consistency_markers_impl.h"
+#include "mongo/db/repl/replication_consistency_markers_mock.h"
+#include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/repl/replication_coordinator_mock.h"
+#include "mongo/db/repl/replication_process.h"
+#include "mongo/db/repl/replication_recovery_mock.h"
+#include "mongo/db/repl/storage_interface_impl.h"
+#include "mongo/db/repl/timestamp_block.h"
+#include "mongo/db/s/collection_sharding_state_factory_shard.h"
+#include "mongo/db/service_context.h"
+#include "mongo/db/service_context_d_test_fixture.h"
+#include "mongo/db/session.h"
+#include "mongo/db/session_catalog_mongod.h"
+#include "mongo/db/storage/snapshot_manager.h"
+#include "mongo/db/storage/storage_engine_impl.h"
+#include "mongo/db/tenant_namespace.h"
+#include "mongo/db/transaction_participant.h"
+#include "mongo/db/transaction_participant_gen.h"
+#include "mongo/db/vector_clock_mutable.h"
+#include "mongo/dbtests/dbtests.h"
+#include "mongo/idl/server_parameter_test_util.h"
+#include "mongo/logv2/log.h"
+#include "mongo/rpc/get_status_from_command_result.h"
+#include "mongo/stdx/future.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/fail_point.h"
+#include "mongo/util/stacktrace.h"
+
+namespace mongo {
+namespace {
+
+Status createIndexFromSpec(OperationContext* opCtx, StringData ns, const BSONObj& spec) {
+ AutoGetDb autoDb(opCtx, nsToDatabaseSubstring(ns), MODE_X);
+ Collection* coll;
+ {
+ WriteUnitOfWork wunit(opCtx);
+ coll = CollectionCatalog::get(opCtx)->lookupCollectionByNamespaceForMetadataWrite(
+ opCtx, CollectionCatalog::LifetimeMode::kInplace, NamespaceString(ns));
+ if (!coll) {
+ auto db = autoDb.ensureDbExists(opCtx);
+ invariant(db);
+ coll = db->createCollection(opCtx, NamespaceString(ns));
+ }
+ invariant(coll);
+ wunit.commit();
+ }
+ MultiIndexBlock indexer;
+ CollectionWriter collection(coll);
+ ScopeGuard abortOnExit(
+ [&] { indexer.abortIndexBuild(opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn); });
+ Status status = indexer
+ .init(opCtx,
+ collection,
+ spec,
+ [opCtx](const std::vector<BSONObj>& specs) -> Status {
+ if (opCtx->recoveryUnit()->getCommitTimestamp().isNull()) {
+ return opCtx->recoveryUnit()->setTimestamp(Timestamp(1, 1));
+ }
+ return Status::OK();
+ })
+ .getStatus();
+ if (status == ErrorCodes::IndexAlreadyExists) {
+ return Status::OK();
+ }
+ if (!status.isOK()) {
+ return status;
+ }
+ status = indexer.insertAllDocumentsInCollection(opCtx, coll);
+ if (!status.isOK()) {
+ return status;
+ }
+ status = indexer.retrySkippedRecords(opCtx, coll);
+ if (!status.isOK()) {
+ return status;
+ }
+ status = indexer.checkConstraints(opCtx, coll);
+ if (!status.isOK()) {
+ return status;
+ }
+ WriteUnitOfWork wunit(opCtx);
+ ASSERT_OK(indexer.commit(
+ opCtx, coll, MultiIndexBlock::kNoopOnCreateEachFn, MultiIndexBlock::kNoopOnCommitFn));
+ ASSERT_OK(opCtx->recoveryUnit()->setTimestamp(Timestamp(1, 1)));
+ wunit.commit();
+ abortOnExit.dismiss();
+ return Status::OK();
+}
+
+/**
+ * RAII type for operating at a timestamp. Will remove any timestamping when the object destructs.
+ */
+class OneOffRead {
+public:
+ OneOffRead(OperationContext* opCtx, const Timestamp& ts) : _opCtx(opCtx) {
+ _opCtx->recoveryUnit()->abandonSnapshot();
+ if (ts.isNull()) {
+ _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+ } else {
+ _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided, ts);
+ }
+ }
+
+ ~OneOffRead() {
+ _opCtx->recoveryUnit()->abandonSnapshot();
+ _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+ }
+
+private:
+ OperationContext* _opCtx;
+};
+
+class IgnorePrepareBlock {
+public:
+ IgnorePrepareBlock(OperationContext* opCtx) : _opCtx(opCtx) {
+ _opCtx->recoveryUnit()->abandonSnapshot();
+ _opCtx->recoveryUnit()->setPrepareConflictBehavior(
+ PrepareConflictBehavior::kIgnoreConflicts);
+ }
+
+ ~IgnorePrepareBlock() {
+ _opCtx->recoveryUnit()->abandonSnapshot();
+ _opCtx->recoveryUnit()->setPrepareConflictBehavior(PrepareConflictBehavior::kEnforce);
+ }
+
+private:
+ OperationContext* _opCtx;
+};
+} // namespace
+
+const auto kIndexVersion = IndexDescriptor::IndexVersion::kV2;
+
+void assertIndexMetaDataMissing(std::shared_ptr<BSONCollectionCatalogEntry::MetaData> collMetaData,
+ StringData indexName) {
+ const auto idxOffset = collMetaData->findIndexOffset(indexName);
+ ASSERT_EQUALS(-1, idxOffset) << indexName << ". Collection Metdata: " << collMetaData->toBSON();
+}
+
+BSONCollectionCatalogEntry::IndexMetaData getIndexMetaData(
+ std::shared_ptr<BSONCollectionCatalogEntry::MetaData> collMetaData, StringData indexName) {
+ const auto idxOffset = collMetaData->findIndexOffset(indexName);
+ ASSERT_GT(idxOffset, -1) << indexName;
+ return collMetaData->indexes[idxOffset];
+}
+
+class DoNothingOplogApplierObserver : public repl::OplogApplier::Observer {
+public:
+ void onBatchBegin(const std::vector<repl::OplogEntry>&) final {}
+ void onBatchEnd(const StatusWith<repl::OpTime>&, const std::vector<repl::OplogEntry>&) final {}
+};
+
+class StorageTimestampTest : public ServiceContextMongoDTest {
+public:
+ ServiceContext::UniqueOperationContext _opCtxRaii;
+ OperationContext* _opCtx;
+ VectorClockMutable* _clock;
+
+ // Set up Timestamps in the past, present, and future.
+ LogicalTime _pastLt;
+ Timestamp _pastTs;
+ LogicalTime _presentLt;
+ Timestamp _presentTs;
+ LogicalTime _futureLt;
+ Timestamp _futureTs;
+ Timestamp _nullTs;
+ int _presentTerm;
+ repl::ReplicationCoordinatorMock* _coordinatorMock;
+ repl::ReplicationConsistencyMarkers* _consistencyMarkers;
+
+ StorageTimestampTest()
+ : ServiceContextMongoDTest("wiredTiger",
+ ServiceContextMongoDTest::RepairAction::kNoRepair,
+ kDefaultStorageEngineInitFlags,
+ true, // useReplSettings
+ true // useMockClock
+ ) {
+ // Set up mongod.
+ ServiceContextMongoDTest::setUp();
+
+ _opCtxRaii = cc().makeOperationContext();
+ _opCtx = _opCtxRaii.get();
+ _clock = VectorClockMutable::get(_opCtx);
+
+ // Set up Timestamps in the past, present, and future.
+ _pastLt = _clock->tickClusterTime(1);
+ _pastTs = _pastLt.asTimestamp();
+ _presentLt = _clock->tickClusterTime(1);
+ _presentTs = _presentLt.asTimestamp();
+ // Without the const, the wrong addTicks overload is used.
+ _futureLt = const_cast<const LogicalTime&>(_presentLt).addTicks(1);
+ _futureTs = _futureLt.asTimestamp();
+ _nullTs = Timestamp();
+ _presentTerm = 1;
+
+ auto coordinatorMock = new repl::ReplicationCoordinatorMock(_opCtx->getServiceContext(),
+ getGlobalReplSettings());
+ _coordinatorMock = coordinatorMock;
+ coordinatorMock->alwaysAllowWrites(true);
+ repl::ReplicationCoordinator::set(
+ _opCtx->getServiceContext(),
+ std::unique_ptr<repl::ReplicationCoordinator>(coordinatorMock));
+ repl::StorageInterface::set(_opCtx->getServiceContext(),
+ std::make_unique<repl::StorageInterfaceImpl>());
+
+ auto replicationProcess = new repl::ReplicationProcess(
+ repl::StorageInterface::get(_opCtx->getServiceContext()),
+ std::make_unique<repl::ReplicationConsistencyMarkersMock>(),
+ std::make_unique<repl::ReplicationRecoveryMock>());
+ repl::ReplicationProcess::set(
+ cc().getServiceContext(),
+ std::unique_ptr<repl::ReplicationProcess>(replicationProcess));
+
+ _consistencyMarkers =
+ repl::ReplicationProcess::get(cc().getServiceContext())->getConsistencyMarkers();
+
+ // Since the Client object persists across tests, even though the global
+ // ReplicationCoordinator does not, we need to clear the last op associated with the client
+ // to avoid the invariant in ReplClientInfo::setLastOp that the optime only goes forward.
+ repl::ReplClientInfo::forClient(_opCtx->getClient()).clearLastOp();
+
+ auto registry = std::make_unique<OpObserverRegistry>();
+ registry->addObserver(std::make_unique<OpObserverImpl>());
+ _opCtx->getServiceContext()->setOpObserver(std::move(registry));
+
+ repl::createOplog(_opCtx);
+
+ _clock->tickClusterTimeTo(LogicalTime(Timestamp(1, 0)));
+
+ ASSERT_EQUALS(_presentTs,
+ const_cast<const LogicalTime&>(_pastLt).addTicks(1).asTimestamp());
+ setReplCoordAppliedOpTime(repl::OpTime(_presentTs, _presentTerm));
+
+ {
+ // These tests were developed prior to needing an index build entry collection. Use an
+ // unreplicated write block to avoid changing oplog state due to collection creation.
+ repl::UnreplicatedWritesBlock unreplicated(_opCtx);
+ WriteUnitOfWork wuow(_opCtx);
+ mongo::indexbuildentryhelpers::ensureIndexBuildEntriesNamespaceExists(_opCtx);
+ wuow.commit();
+ }
+ }
+
+ ~StorageTimestampTest() {
+ try {
+ reset(NamespaceString("local.oplog.rs"));
+ } catch (...) {
+ FAIL("Exception while cleaning up test");
+ }
+ }
+
+ void dumpOplog() {
+ OneOffRead oor(_opCtx, Timestamp::min());
+ _opCtx->recoveryUnit()->beginUnitOfWork(_opCtx);
+ LOGV2(8423335, "Dumping oplog collection");
+ AutoGetCollectionForRead oplogRaii(_opCtx, NamespaceString::kRsOplogNamespace);
+ const CollectionPtr& oplogColl = oplogRaii.getCollection();
+ auto oplogRs = oplogColl->getRecordStore();
+ auto oplogCursor = oplogRs->getCursor(_opCtx);
+
+ while (true) {
+ auto doc = oplogCursor->next();
+ if (doc) {
+ LOGV2(8423329, "\tOplog Entry", "oplog"_attr = doc->data.toBson());
+ } else {
+ break;
+ }
+ }
+ _opCtx->recoveryUnit()->abortUnitOfWork();
+ }
+
+ /**
+ * Walking on ice: resetting the ReplicationCoordinator destroys the underlying
+ * `DropPendingCollectionReaper`. Use a truncate/dropAllIndexes to clean out a collection
+ * without actually dropping it.
+ */
+ void reset(NamespaceString nss) const {
+ ::mongo::writeConflictRetry(_opCtx, "deleteAll", nss.ns(), [&] {
+ _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+ _opCtx->recoveryUnit()->abandonSnapshot();
+ AutoGetCollection collRaii(_opCtx, nss, LockMode::MODE_X);
+
+ if (collRaii) {
+ WriteUnitOfWork wunit(_opCtx);
+ invariant(collRaii.getWritableCollection(_opCtx)->truncate(_opCtx).isOK());
+ if (_opCtx->recoveryUnit()->getCommitTimestamp().isNull()) {
+ ASSERT_OK(_opCtx->recoveryUnit()->setTimestamp(Timestamp(1, 1)));
+ }
+ collRaii.getWritableCollection(_opCtx)->getIndexCatalog()->dropAllIndexes(
+ _opCtx, collRaii.getWritableCollection(_opCtx), false);
+ wunit.commit();
+ return;
+ }
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ auto db = autoColl.ensureDbExists(_opCtx);
+ WriteUnitOfWork wunit(_opCtx);
+ if (_opCtx->recoveryUnit()->getCommitTimestamp().isNull()) {
+ ASSERT_OK(_opCtx->recoveryUnit()->setTimestamp(Timestamp(1, 1)));
+ }
+ invariant(db->createCollection(_opCtx, nss));
+ wunit.commit();
+ });
+ }
+
+ void insertDocument(const CollectionPtr& coll, const InsertStatement& stmt) {
+ // Insert some documents.
+ OpDebug* const nullOpDebug = nullptr;
+ const bool fromMigrate = false;
+ ASSERT_OK(coll->insertDocument(_opCtx, stmt, nullOpDebug, fromMigrate));
+ }
+
+ void createIndex(CollectionWriter& coll, std::string indexName, const BSONObj& indexKey) {
+
+ // Build an index.
+ MultiIndexBlock indexer;
+ ScopeGuard abortOnExit(
+ [&] { indexer.abortIndexBuild(_opCtx, coll, MultiIndexBlock::kNoopOnCleanUpFn); });
+
+ BSONObj indexInfoObj;
+ {
+ auto swIndexInfoObj =
+ indexer.init(_opCtx,
+ coll,
+ {BSON("v" << 2 << "name" << indexName << "key" << indexKey)},
+ MultiIndexBlock::makeTimestampedIndexOnInitFn(_opCtx, coll.get()));
+ ASSERT_OK(swIndexInfoObj.getStatus());
+ indexInfoObj = std::move(swIndexInfoObj.getValue()[0]);
+ }
+
+ ASSERT_OK(indexer.insertAllDocumentsInCollection(_opCtx, coll.get()));
+ ASSERT_OK(indexer.checkConstraints(_opCtx, coll.get()));
+
+ {
+ WriteUnitOfWork wuow(_opCtx);
+ // Timestamping index completion. Primaries write an oplog entry.
+ ASSERT_OK(
+ indexer.commit(_opCtx,
+ coll.getWritableCollection(),
+ [&](const BSONObj& indexSpec) {
+ _opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
+ _opCtx, coll->ns(), coll->uuid(), indexSpec, false);
+ },
+ MultiIndexBlock::kNoopOnCommitFn));
+ // The timestamping repsponsibility is placed on the caller rather than the
+ // MultiIndexBlock.
+ wuow.commit();
+ }
+ abortOnExit.dismiss();
+ }
+
+ std::int32_t itCount(const CollectionPtr& coll) {
+ std::uint64_t ret = 0;
+ auto cursor = coll->getRecordStore()->getCursor(_opCtx);
+ while (cursor->next() != boost::none) {
+ ++ret;
+ }
+
+ return ret;
+ }
+
+ BSONObj findOne(const CollectionPtr& coll) {
+ auto optRecord = coll->getRecordStore()->getCursor(_opCtx)->next();
+ if (optRecord == boost::none) {
+ // Print a stack trace to help disambiguate which `findOne` failed.
+ printStackTrace();
+ FAIL("Did not find any documents.");
+ }
+ return optRecord.get().data.toBson();
+ }
+
+ std::shared_ptr<BSONCollectionCatalogEntry::MetaData> getMetaDataAtTime(
+ DurableCatalog* durableCatalog, RecordId catalogId, const Timestamp& ts) {
+ OneOffRead oor(_opCtx, ts);
+ return durableCatalog->getMetaData(_opCtx, catalogId);
+ }
+
+ StatusWith<BSONObj> doAtomicApplyOps(const std::string& dbName,
+ const std::list<BSONObj>& applyOpsList) {
+ OneOffRead oor(_opCtx, Timestamp::min());
+
+ BSONObjBuilder result;
+ Status status = applyOps(_opCtx,
+ dbName,
+ BSON("applyOps" << applyOpsList),
+ repl::OplogApplication::Mode::kApplyOpsCmd,
+ &result);
+ if (!status.isOK()) {
+ return status;
+ }
+
+ return {result.obj()};
+ }
+
+ // Creates a dummy command operation to persuade `applyOps` to be non-atomic.
+ StatusWith<BSONObj> doNonAtomicApplyOps(const std::string& dbName,
+ const std::list<BSONObj>& applyOpsList) {
+ OneOffRead oor(_opCtx, Timestamp::min());
+
+ BSONObjBuilder result;
+ Status status = applyOps(_opCtx,
+ dbName,
+ BSON("applyOps" << applyOpsList << "allowAtomic" << false),
+ repl::OplogApplication::Mode::kApplyOpsCmd,
+ &result);
+ if (!status.isOK()) {
+ return status;
+ }
+
+ return {result.obj()};
+ }
+
+ BSONObj queryCollection(NamespaceString nss, const BSONObj& query) {
+ BSONObj ret;
+ ASSERT_TRUE(Helpers::findOne(
+ _opCtx, AutoGetCollectionForRead(_opCtx, nss).getCollection(), query, ret))
+ << "Query: " << query;
+ return ret;
+ }
+
+ BSONObj queryOplog(const BSONObj& query) {
+ OneOffRead oor(_opCtx, Timestamp::min());
+ return queryCollection(NamespaceString::kRsOplogNamespace, query);
+ }
+
+ void assertMinValidDocumentAtTimestamp(const NamespaceString& nss,
+ const Timestamp& ts,
+ const repl::MinValidDocument& expectedDoc) {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ const CollectionPtr& coll = autoColl.getCollection();
+
+ OneOffRead oor(_opCtx, ts);
+
+ auto doc =
+ repl::MinValidDocument::parse(IDLParserErrorContext("MinValidDocument"), findOne(coll));
+ ASSERT_EQ(expectedDoc.getMinValidTimestamp(), doc.getMinValidTimestamp())
+ << "minValid timestamps weren't equal at " << ts.toString()
+ << ". Expected: " << expectedDoc.toBSON() << ". Found: " << doc.toBSON();
+ ASSERT_EQ(expectedDoc.getMinValidTerm(), doc.getMinValidTerm())
+ << "minValid terms weren't equal at " << ts.toString()
+ << ". Expected: " << expectedDoc.toBSON() << ". Found: " << doc.toBSON();
+ ASSERT_EQ(expectedDoc.getAppliedThrough(), doc.getAppliedThrough())
+ << "appliedThrough OpTimes weren't equal at " << ts.toString()
+ << ". Expected: " << expectedDoc.toBSON() << ". Found: " << doc.toBSON();
+ ASSERT_EQ(expectedDoc.getInitialSyncFlag(), doc.getInitialSyncFlag())
+ << "Initial sync flags weren't equal at " << ts.toString()
+ << ". Expected: " << expectedDoc.toBSON() << ". Found: " << doc.toBSON();
+ }
+
+ void assertDocumentAtTimestamp(const CollectionPtr& coll,
+ const Timestamp& ts,
+ const BSONObj& expectedDoc) {
+ OneOffRead oor(_opCtx, ts);
+ if (expectedDoc.isEmpty()) {
+ ASSERT_EQ(0, itCount(coll))
+ << "Should not find any documents in " << coll->ns() << " at ts: " << ts;
+ } else {
+ ASSERT_EQ(1, itCount(coll))
+ << "Should find one document in " << coll->ns() << " at ts: " << ts;
+ auto doc = findOne(coll);
+ ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(doc, expectedDoc))
+ << "Doc: " << doc.toString() << " Expected: " << expectedDoc.toString();
+ }
+ }
+
+ void assertFilteredDocumentAtTimestamp(const CollectionPtr& coll,
+ const BSONObj& query,
+ const Timestamp& ts,
+ boost::optional<const BSONObj&> expectedDoc) {
+ OneOffRead oor(_opCtx, ts);
+ BSONObj doc;
+ bool found = Helpers::findOne(_opCtx, coll, query, doc);
+ if (!expectedDoc) {
+ ASSERT_FALSE(found) << "Should not find any documents in " << coll->ns() << " matching "
+ << query << " at ts: " << ts;
+ } else {
+ ASSERT(found) << "Should find document in " << coll->ns() << " matching " << query
+ << " at ts: " << ts;
+ ASSERT_BSONOBJ_EQ(doc, *expectedDoc);
+ }
+ }
+
+ void assertOplogDocumentExistsAtTimestamp(const BSONObj& query,
+ const Timestamp& ts,
+ bool exists) {
+ OneOffRead oor(_opCtx, ts);
+ BSONObj ret;
+ bool found = Helpers::findOne(
+ _opCtx,
+ AutoGetCollectionForRead(_opCtx, NamespaceString::kRsOplogNamespace).getCollection(),
+ query,
+ ret);
+ ASSERT_EQ(found, exists) << "Found " << ret << " at " << ts.toBSON();
+ ASSERT_EQ(!ret.isEmpty(), exists) << "Found " << ret << " at " << ts.toBSON();
+ }
+
+ void assertOldestActiveTxnTimestampEquals(const boost::optional<Timestamp>& ts,
+ const Timestamp& atTs) {
+ auto oldest = TransactionParticipant::getOldestActiveTimestamp(atTs);
+ ASSERT_EQ(oldest, ts);
+ }
+
+ void assertHasStartOpTime() {
+ auto txnDoc = _getTxnDoc();
+ ASSERT_TRUE(txnDoc.hasField(SessionTxnRecord::kStartOpTimeFieldName));
+ }
+
+ void assertNoStartOpTime() {
+ auto txnDoc = _getTxnDoc();
+ ASSERT_FALSE(txnDoc.hasField(SessionTxnRecord::kStartOpTimeFieldName));
+ }
+
+ void setReplCoordAppliedOpTime(const repl::OpTime& opTime, Date_t wallTime = Date_t()) {
+ repl::ReplicationCoordinator::get(_opCtx->getServiceContext())
+ ->setMyLastAppliedOpTimeAndWallTime({opTime, wallTime});
+ ASSERT_OK(repl::ReplicationCoordinator::get(_opCtx->getServiceContext())
+ ->updateTerm(_opCtx, opTime.getTerm()));
+ }
+
+ /**
+ * Asserts that the given collection is in (or not in) the DurableCatalog's list of idents at
+ * the
+ * provided timestamp.
+ */
+ void assertNamespaceInIdents(NamespaceString nss, Timestamp ts, bool shouldExpect) {
+ OneOffRead oor(_opCtx, ts);
+ auto durableCatalog = DurableCatalog::get(_opCtx);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
+
+ // getCollectionIdent() returns the ident for the given namespace in the DurableCatalog.
+ // getAllIdents() actually looks in the RecordStore for a list of all idents, and is thus
+ // versioned by timestamp. We can expect a namespace to have a consistent ident across
+ // timestamps, provided the collection does not get renamed.
+ auto expectedIdent =
+ durableCatalog->getEntry(autoColl.getCollection()->getCatalogId()).ident;
+ auto idents = durableCatalog->getAllIdents(_opCtx);
+ auto found = std::find(idents.begin(), idents.end(), expectedIdent);
+
+ if (shouldExpect) {
+ ASSERT(found != idents.end()) << nss.ns() << " was not found at " << ts.toString();
+ } else {
+ ASSERT(found == idents.end()) << nss.ns() << " was found at " << ts.toString()
+ << " when it should not have been.";
+ }
+ }
+
+ /**
+ * Use `ts` = Timestamp::min to observe all indexes.
+ */
+ std::string getNewIndexIdentAtTime(DurableCatalog* durableCatalog,
+ std::vector<std::string>& origIdents,
+ Timestamp ts) {
+ auto ret = getNewIndexIdentsAtTime(durableCatalog, origIdents, ts);
+ ASSERT_EQ(static_cast<std::size_t>(1), ret.size()) << " Num idents: " << ret.size();
+ return ret[0];
+ }
+
+ /**
+ * Use `ts` = Timestamp::min to observe all indexes.
+ */
+ std::vector<std::string> getNewIndexIdentsAtTime(DurableCatalog* durableCatalog,
+ std::vector<std::string>& origIdents,
+ Timestamp ts) {
+ OneOffRead oor(_opCtx, ts);
+
+ // Find the collection and index ident by performing a set difference on the original
+ // idents and the current idents.
+ std::vector<std::string> identsWithColl = durableCatalog->getAllIdents(_opCtx);
+ std::sort(origIdents.begin(), origIdents.end());
+ std::sort(identsWithColl.begin(), identsWithColl.end());
+ std::vector<std::string> idxIdents;
+ std::set_difference(identsWithColl.begin(),
+ identsWithColl.end(),
+ origIdents.begin(),
+ origIdents.end(),
+ std::back_inserter(idxIdents));
+
+ for (const auto& ident : idxIdents) {
+ ASSERT(ident.find("index-") == 0) << "Ident is not an index: " << ident;
+ }
+ return idxIdents;
+ }
+
+ std::string getDroppedIndexIdent(DurableCatalog* durableCatalog,
+ std::vector<std::string>& origIdents) {
+ // Find the collection and index ident by performing a set difference on the original
+ // idents and the current idents.
+ std::vector<std::string> identsWithColl = durableCatalog->getAllIdents(_opCtx);
+ std::sort(origIdents.begin(), origIdents.end());
+ std::sort(identsWithColl.begin(), identsWithColl.end());
+ std::vector<std::string> collAndIdxIdents;
+ std::set_difference(origIdents.begin(),
+ origIdents.end(),
+ identsWithColl.begin(),
+ identsWithColl.end(),
+ std::back_inserter(collAndIdxIdents));
+
+ ASSERT(collAndIdxIdents.size() == 1) << "Num idents: " << collAndIdxIdents.size();
+ return collAndIdxIdents[0];
+ }
+
+ std::vector<std::string> _getIdentDifference(DurableCatalog* durableCatalog,
+ std::vector<std::string>& origIdents) {
+ // Find the ident difference by performing a set difference on the original idents and the
+ // current idents.
+ std::vector<std::string> identsWithColl = durableCatalog->getAllIdents(_opCtx);
+ std::sort(origIdents.begin(), origIdents.end());
+ std::sort(identsWithColl.begin(), identsWithColl.end());
+ std::vector<std::string> collAndIdxIdents;
+ std::set_difference(identsWithColl.begin(),
+ identsWithColl.end(),
+ origIdents.begin(),
+ origIdents.end(),
+ std::back_inserter(collAndIdxIdents));
+ return collAndIdxIdents;
+ }
+ std::tuple<std::string, std::string> getNewCollectionIndexIdent(
+ DurableCatalog* durableCatalog, std::vector<std::string>& origIdents) {
+ // Find the collection and index ident difference.
+ auto collAndIdxIdents = _getIdentDifference(durableCatalog, origIdents);
+
+ ASSERT(collAndIdxIdents.size() == 1 || collAndIdxIdents.size() == 2);
+ if (collAndIdxIdents.size() == 1) {
+ // `system.profile` collections do not have an `_id` index.
+ return std::tie(collAndIdxIdents[0], "");
+ }
+ if (collAndIdxIdents.size() == 2) {
+ // The idents are sorted, so the `collection-...` comes before `index-...`
+ return std::tie(collAndIdxIdents[0], collAndIdxIdents[1]);
+ }
+
+ MONGO_UNREACHABLE;
+ }
+
+ /**
+ * Note: expectedNewIndexIdents should include the _id index.
+ */
+ void assertRenamedCollectionIdentsAtTimestamp(DurableCatalog* durableCatalog,
+ std::vector<std::string>& origIdents,
+ size_t expectedNewIndexIdents,
+ Timestamp timestamp) {
+ OneOffRead oor(_opCtx, timestamp);
+ // Find the collection and index ident difference.
+ auto collAndIdxIdents = _getIdentDifference(durableCatalog, origIdents);
+ size_t newNssIdents, newIdxIdents;
+ newNssIdents = newIdxIdents = 0;
+ for (const auto& ident : collAndIdxIdents) {
+ ASSERT(ident.find("index-") == 0 || ident.find("collection-") == 0)
+ << "Ident is not an index or collection: " << ident;
+ if (ident.find("collection-") == 0) {
+ ASSERT(++newNssIdents == 1) << "Expected new collection idents (1) differ from "
+ "actual new collection idents ("
+ << newNssIdents << ")";
+ } else {
+ newIdxIdents++;
+ }
+ }
+ ASSERT(expectedNewIndexIdents == newIdxIdents)
+ << "Expected new index idents (" << expectedNewIndexIdents
+ << ") differ from actual new index idents (" << newIdxIdents << ")";
+ }
+
+ void assertIdentsExistAtTimestamp(DurableCatalog* durableCatalog,
+ const std::string& collIdent,
+ const std::string& indexIdent,
+ Timestamp timestamp) {
+ OneOffRead oor(_opCtx, timestamp);
+
+ auto allIdents = durableCatalog->getAllIdents(_opCtx);
+ if (collIdent.size() > 0) {
+ // Index build test does not pass in a collection ident.
+ ASSERT(std::find(allIdents.begin(), allIdents.end(), collIdent) != allIdents.end());
+ }
+
+ if (indexIdent.size() > 0) {
+ // `system.profile` does not have an `_id` index.
+ ASSERT(std::find(allIdents.begin(), allIdents.end(), indexIdent) != allIdents.end());
+ }
+ }
+
+ void assertIdentsMissingAtTimestamp(DurableCatalog* durableCatalog,
+ const std::string& collIdent,
+ const std::string& indexIdent,
+ Timestamp timestamp) {
+ OneOffRead oor(_opCtx, timestamp);
+ auto allIdents = durableCatalog->getAllIdents(_opCtx);
+ if (collIdent.size() > 0) {
+ // Index build test does not pass in a collection ident.
+ ASSERT(std::find(allIdents.begin(), allIdents.end(), collIdent) == allIdents.end());
+ }
+
+ ASSERT(std::find(allIdents.begin(), allIdents.end(), indexIdent) == allIdents.end())
+ << "Ident: " << indexIdent << " Timestamp: " << timestamp;
+ }
+
+ std::string dumpMultikeyPaths(const MultikeyPaths& multikeyPaths) {
+ std::stringstream ss;
+
+ ss << "[ ";
+ for (const auto& multikeyComponents : multikeyPaths) {
+ ss << "[ ";
+ for (const auto& multikeyComponent : multikeyComponents) {
+ ss << multikeyComponent << " ";
+ }
+ ss << "] ";
+ }
+ ss << "]";
+
+ return ss.str();
+ }
+
+ void assertMultikeyPaths(OperationContext* opCtx,
+ const CollectionPtr& collection,
+ StringData indexName,
+ Timestamp ts,
+ bool shouldBeMultikey,
+ const MultikeyPaths& expectedMultikeyPaths) {
+ DurableCatalog* durableCatalog = DurableCatalog::get(opCtx);
+
+ OneOffRead oor(_opCtx, ts);
+
+ MultikeyPaths actualMultikeyPaths;
+ if (!shouldBeMultikey) {
+ ASSERT_FALSE(durableCatalog->isIndexMultikey(
+ opCtx, collection->getCatalogId(), indexName, &actualMultikeyPaths))
+ << "index " << indexName << " should not be multikey at timestamp " << ts;
+ } else {
+ ASSERT(durableCatalog->isIndexMultikey(
+ opCtx, collection->getCatalogId(), indexName, &actualMultikeyPaths))
+ << "index " << indexName << " should be multikey at timestamp " << ts;
+ }
+
+ const bool match = (expectedMultikeyPaths == actualMultikeyPaths);
+ if (!match) {
+ FAIL(str::stream() << "TS: " << ts.toString()
+ << ", Expected: " << dumpMultikeyPaths(expectedMultikeyPaths)
+ << ", Actual: " << dumpMultikeyPaths(actualMultikeyPaths));
+ }
+ ASSERT_TRUE(match);
+ }
+
+private:
+ BSONObj _getTxnDoc() {
+ auto txnParticipant = TransactionParticipant::get(_opCtx);
+ auto txnsFilter =
+ BSON("_id" << _opCtx->getLogicalSessionId()->toBSON() << "txnNum"
+ << txnParticipant.getActiveTxnNumberAndRetryCounter().getTxnNumber());
+ return queryCollection(NamespaceString::kSessionTransactionsTableNamespace, txnsFilter);
+ }
+};
+
+TEST_F(StorageTimestampTest, SecondaryInsertTimes) {
+ // In order for applyOps to assign timestamps, we must be in non-replicated mode.
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+
+ // Create a new collection.
+ NamespaceString nss("unittests.timestampedUpdates");
+ reset(nss);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+
+ const std::int32_t docsToInsert = 10;
+ const LogicalTime firstInsertTime = _clock->tickClusterTime(docsToInsert);
+ for (std::int32_t idx = 0; idx < docsToInsert; ++idx) {
+ BSONObjBuilder result;
+ ASSERT_OK(applyOps(
+ _opCtx,
+ nss.db().toString(),
+ BSON("applyOps" << BSON_ARRAY(
+ BSON("ts" << firstInsertTime.addTicks(idx).asTimestamp() << "t" << 1LL << "v"
+ << 2 << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid()
+ << "wall" << Date_t() << "o" << BSON("_id" << idx))
+ << BSON("ts" << firstInsertTime.addTicks(idx).asTimestamp() << "t" << 1LL
+ << "op"
+ << "c"
+ << "ns"
+ << "test.$cmd"
+ << "wall" << Date_t() << "o"
+ << BSON("applyOps" << BSONArrayBuilder().arr())))),
+ repl::OplogApplication::Mode::kApplyOpsCmd,
+ &result));
+ }
+
+ for (std::int32_t idx = 0; idx < docsToInsert; ++idx) {
+ OneOffRead oor(_opCtx, firstInsertTime.addTicks(idx).asTimestamp());
+
+ BSONObj result;
+ ASSERT(Helpers::getLast(_opCtx, nss.ns().c_str(), result)) << " idx is " << idx;
+ ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(result, BSON("_id" << idx)))
+ << "Doc: " << result.toString() << " Expected: " << BSON("_id" << idx);
+ }
+}
+
+TEST_F(StorageTimestampTest, SecondaryArrayInsertTimes) {
+ // In order for oplog application to assign timestamps, we must be in non-replicated mode
+ // and disable document validation.
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+ DisableDocumentValidation validationDisabler(_opCtx);
+
+ // Create a new collection.
+ NamespaceString nss("unittests.timestampedUpdates");
+ reset(nss);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+
+ const std::int32_t docsToInsert = 10;
+ const LogicalTime firstInsertTime = _clock->tickClusterTime(docsToInsert);
+
+ BSONObjBuilder oplogCommonBuilder;
+ oplogCommonBuilder << "v" << 2 << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid() << "wall"
+ << Date_t();
+ auto oplogCommon = oplogCommonBuilder.done();
+
+ std::vector<repl::OplogEntry> oplogEntries;
+ oplogEntries.reserve(docsToInsert);
+ std::vector<const repl::OplogEntry*> opPtrs;
+ BSONObjBuilder oplogEntryBuilders[docsToInsert];
+ for (std::int32_t idx = 0; idx < docsToInsert; ++idx) {
+ auto o = BSON("_id" << idx);
+ // Populate the "ts" field.
+ oplogEntryBuilders[idx] << "ts" << firstInsertTime.addTicks(idx).asTimestamp();
+ // Populate the "t" (term) field.
+ oplogEntryBuilders[idx] << "t" << 1LL;
+ // Populate the "o" field.
+ oplogEntryBuilders[idx] << "o" << o;
+ // Populate the "wall" field
+ oplogEntryBuilders[idx] << "wall" << Date_t();
+ // Populate the other common fields.
+ oplogEntryBuilders[idx].appendElementsUnique(oplogCommon);
+ // Insert ops to be applied.
+ oplogEntries.push_back(repl::OplogEntry(oplogEntryBuilders[idx].done()));
+ opPtrs.push_back(&(oplogEntries.back()));
+ }
+
+ repl::OplogEntryOrGroupedInserts groupedInserts(opPtrs.cbegin(), opPtrs.cend());
+ const bool dataIsConsistent = true;
+ ASSERT_OK(repl::applyOplogEntryOrGroupedInserts(
+ _opCtx, groupedInserts, repl::OplogApplication::Mode::kSecondary, dataIsConsistent));
+
+ for (std::int32_t idx = 0; idx < docsToInsert; ++idx) {
+ OneOffRead oor(_opCtx, firstInsertTime.addTicks(idx).asTimestamp());
+
+ BSONObj result;
+ ASSERT(Helpers::getLast(_opCtx, nss.ns().c_str(), result)) << " idx is " << idx;
+ ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(result, BSON("_id" << idx)))
+ << "Doc: " << result.toString() << " Expected: " << BSON("_id" << idx);
+ }
+}
+
+
+TEST_F(StorageTimestampTest, SecondaryDeleteTimes) {
+ // In order for applyOps to assign timestamps, we must be in non-replicated mode.
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+
+ // Create a new collection.
+ NamespaceString nss("unittests.timestampedDeletes");
+ reset(nss);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+
+ // Insert some documents.
+ const std::int32_t docsToInsert = 10;
+ const LogicalTime firstInsertTime = _clock->tickClusterTime(docsToInsert);
+ const LogicalTime lastInsertTime = firstInsertTime.addTicks(docsToInsert - 1);
+ WriteUnitOfWork wunit(_opCtx);
+ for (std::int32_t num = 0; num < docsToInsert; ++num) {
+ insertDocument(autoColl.getCollection(),
+ InsertStatement(BSON("_id" << num << "a" << num),
+ firstInsertTime.addTicks(num).asTimestamp(),
+ 0LL));
+ }
+ wunit.commit();
+ ASSERT_EQ(docsToInsert, itCount(autoColl.getCollection()));
+
+ // Delete all documents one at a time.
+ const LogicalTime startDeleteTime = _clock->tickClusterTime(docsToInsert);
+ for (std::int32_t num = 0; num < docsToInsert; ++num) {
+ ASSERT_OK(doNonAtomicApplyOps(
+ nss.db().toString(),
+ {BSON("ts" << startDeleteTime.addTicks(num).asTimestamp() << "t" << 0LL << "v"
+ << 2 << "op"
+ << "d"
+ << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid()
+ << "wall" << Date_t() << "o" << BSON("_id" << num))})
+ .getStatus());
+ }
+
+ for (std::int32_t num = 0; num <= docsToInsert; ++num) {
+ // The first loop queries at `lastInsertTime` and should count all documents. Querying
+ // at each successive tick counts one less document.
+ OneOffRead oor(_opCtx, lastInsertTime.addTicks(num).asTimestamp());
+ ASSERT_EQ(docsToInsert - num, itCount(autoColl.getCollection()));
+ }
+}
+
+TEST_F(StorageTimestampTest, SecondaryUpdateTimes) {
+ // In order for applyOps to assign timestamps, we must be in non-replicated mode.
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+
+ // Create a new collection.
+ NamespaceString nss("unittests.timestampedUpdates");
+ reset(nss);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+
+ // Insert one document that will go through a series of updates.
+ const LogicalTime insertTime = _clock->tickClusterTime(1);
+ WriteUnitOfWork wunit(_opCtx);
+ insertDocument(autoColl.getCollection(),
+ InsertStatement(BSON("_id" << 0), insertTime.asTimestamp(), 0LL));
+ wunit.commit();
+ ASSERT_EQ(1, itCount(autoColl.getCollection()));
+
+ // Each pair in the vector represents the update to perform at the next tick of the
+ // clock. `pair.first` is the update to perform and `pair.second` is the full value of the
+ // document after the transformation.
+ const std::vector<std::pair<BSONObj, BSONObj>> updates = {
+ {BSON("$set" << BSON("val" << 1)), BSON("_id" << 0 << "val" << 1)},
+ {BSON("$unset" << BSON("val" << 1)), BSON("_id" << 0)},
+ {BSON("$addToSet" << BSON("theSet" << 1)), BSON("_id" << 0 << "theSet" << BSON_ARRAY(1))},
+ {BSON("$addToSet" << BSON("theSet" << 2)),
+ BSON("_id" << 0 << "theSet" << BSON_ARRAY(1 << 2))},
+ {BSON("$pull" << BSON("theSet" << 1)), BSON("_id" << 0 << "theSet" << BSON_ARRAY(2))},
+ {BSON("$pull" << BSON("theSet" << 2)), BSON("_id" << 0 << "theSet" << BSONArray())},
+ {BSON("$set" << BSON("theMap.val" << 1)),
+ BSON("_id" << 0 << "theSet" << BSONArray() << "theMap" << BSON("val" << 1))},
+ {BSON("$rename" << BSON("theSet"
+ << "theOtherSet")),
+ BSON("_id" << 0 << "theMap" << BSON("val" << 1) << "theOtherSet" << BSONArray())}};
+
+ const LogicalTime firstUpdateTime = _clock->tickClusterTime(updates.size());
+ for (std::size_t idx = 0; idx < updates.size(); ++idx) {
+ ASSERT_OK(
+ doNonAtomicApplyOps(
+ nss.db().toString(),
+ {BSON("ts" << firstUpdateTime.addTicks(idx).asTimestamp() << "t" << 0LL << "v" << 2
+ << "op"
+ << "u"
+ << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid() << "wall"
+ << Date_t() << "o2" << BSON("_id" << 0) << "o" << updates[idx].first)})
+ .getStatus());
+ }
+
+ for (std::size_t idx = 0; idx < updates.size(); ++idx) {
+ // Querying at each successive ticks after `insertTime` sees the document transform in
+ // the series.
+ OneOffRead oor(_opCtx, insertTime.addTicks(idx + 1).asTimestamp());
+
+ auto doc = findOne(autoColl.getCollection());
+ ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(doc, updates[idx].second))
+ << "Doc: " << doc.toString() << " Expected: " << updates[idx].second.toString();
+ }
+}
+
+TEST_F(StorageTimestampTest, SecondaryInsertToUpsert) {
+ // In order for applyOps to assign timestamps, we must be in non-replicated mode.
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+
+ // Create a new collection.
+ NamespaceString nss("unittests.insertToUpsert");
+ reset(nss);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+
+ const LogicalTime insertTime = _clock->tickClusterTime(2);
+
+ // This applyOps runs into an insert of `{_id: 0, field: 0}` followed by a second insert
+ // on the same collection with `{_id: 0}`. It's expected for this second insert to be
+ // turned into an upsert. The goal document does not contain `field: 0`.
+ BSONObjBuilder resultBuilder;
+ auto result = unittest::assertGet(doNonAtomicApplyOps(
+ nss.db().toString(),
+ {BSON("ts" << insertTime.asTimestamp() << "t" << 1LL << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid() << "wall"
+ << Date_t() << "o" << BSON("_id" << 0 << "field" << 0)),
+ BSON("ts" << insertTime.addTicks(1).asTimestamp() << "t" << 1LL << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid() << "wall"
+ << Date_t() << "o" << BSON("_id" << 0))}));
+
+ ASSERT_EQ(2, result.getIntField("applied"));
+ ASSERT(result["results"].Array()[0].Bool());
+ ASSERT(result["results"].Array()[1].Bool());
+
+ // Reading at `insertTime` should show the original document, `{_id: 0, field: 0}`.
+ auto recoveryUnit = _opCtx->recoveryUnit();
+ recoveryUnit->abandonSnapshot();
+ recoveryUnit->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
+ insertTime.asTimestamp());
+ auto doc = findOne(autoColl.getCollection());
+ ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(doc, BSON("_id" << 0 << "field" << 0)))
+ << "Doc: " << doc.toString() << " Expected: {_id: 0, field: 0}";
+
+ // Reading at `insertTime + 1` should show the second insert that got converted to an
+ // upsert, `{_id: 0}`.
+ recoveryUnit->abandonSnapshot();
+ recoveryUnit->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
+ insertTime.addTicks(1).asTimestamp());
+ doc = findOne(autoColl.getCollection());
+ ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(doc, BSON("_id" << 0)))
+ << "Doc: " << doc.toString() << " Expected: {_id: 0}";
+}
+
+TEST_F(StorageTimestampTest, SecondaryAtomicApplyOps) {
+ // Create a new collection.
+ NamespaceString nss("unittests.insertToUpsert");
+ reset(nss);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+
+ // Reserve a timestamp before the inserts should happen.
+ const LogicalTime preInsertTimestamp = _clock->tickClusterTime(1);
+ auto swResult =
+ doAtomicApplyOps(nss.db().toString(),
+ {BSON("op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid()
+ << "o" << BSON("_id" << 0)),
+ BSON("op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid()
+ << "o" << BSON("_id" << 1))});
+ ASSERT_OK(swResult);
+
+ ASSERT_EQ(2, swResult.getValue().getIntField("applied"));
+ ASSERT(swResult.getValue()["results"].Array()[0].Bool());
+ ASSERT(swResult.getValue()["results"].Array()[1].Bool());
+
+ // Reading at `preInsertTimestamp` should not find anything.
+ auto recoveryUnit = _opCtx->recoveryUnit();
+ recoveryUnit->abandonSnapshot();
+ recoveryUnit->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
+ preInsertTimestamp.asTimestamp());
+ ASSERT_EQ(0, itCount(autoColl.getCollection()))
+ << "Should not observe a write at `preInsertTimestamp`. TS: "
+ << preInsertTimestamp.asTimestamp();
+
+ // Reading at `preInsertTimestamp + 1` should observe both inserts.
+ recoveryUnit = _opCtx->recoveryUnit();
+ recoveryUnit->abandonSnapshot();
+ recoveryUnit->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
+ preInsertTimestamp.addTicks(1).asTimestamp());
+ ASSERT_EQ(2, itCount(autoColl.getCollection()))
+ << "Should observe both writes at `preInsertTimestamp + 1`. TS: "
+ << preInsertTimestamp.addTicks(1).asTimestamp();
+}
+
+// This should have the same result as `SecondaryInsertToUpsert` except it gets there a different
+// way. Doing an atomic `applyOps` should result in a WriteConflictException because the same
+// transaction is trying to write modify the same document twice. The `applyOps` command should
+// catch that failure and retry in non-atomic mode, preserving the timestamps supplied by the
+// user.
+TEST_F(StorageTimestampTest, SecondaryAtomicApplyOpsWCEToNonAtomic) {
+ // Create a new collectiont.
+ NamespaceString nss("unitteTsts.insertToUpsert");
+ reset(nss);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+
+ const LogicalTime preInsertTimestamp = _clock->tickClusterTime(1);
+ auto swResult =
+ doAtomicApplyOps(nss.db().toString(),
+ {BSON("op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid()
+ << "o" << BSON("_id" << 0 << "field" << 0)),
+ BSON("op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid()
+ << "o" << BSON("_id" << 0))});
+ ASSERT_OK(swResult);
+
+ ASSERT_EQ(2, swResult.getValue().getIntField("applied"));
+ ASSERT(swResult.getValue()["results"].Array()[0].Bool());
+ ASSERT(swResult.getValue()["results"].Array()[1].Bool());
+
+ // Reading at `insertTime` should not see any documents.
+ auto recoveryUnit = _opCtx->recoveryUnit();
+ recoveryUnit->abandonSnapshot();
+ recoveryUnit->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
+ preInsertTimestamp.asTimestamp());
+ ASSERT_EQ(0, itCount(autoColl.getCollection()))
+ << "Should not find any documents at `preInsertTimestamp`. TS: "
+ << preInsertTimestamp.asTimestamp();
+
+ // Reading at `preInsertTimestamp + 1` should show the final state of the document.
+ recoveryUnit->abandonSnapshot();
+ recoveryUnit->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
+ preInsertTimestamp.addTicks(1).asTimestamp());
+ auto doc = findOne(autoColl.getCollection());
+ ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(doc, BSON("_id" << 0)))
+ << "Doc: " << doc.toString() << " Expected: {_id: 0}";
+}
+
+TEST_F(StorageTimestampTest, SecondaryCreateCollection) {
+ // In order for applyOps to assign timestamps, we must be in non-replicated mode.
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+
+ NamespaceString nss("unittests.secondaryCreateCollection");
+ ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(_opCtx, nss));
+
+ { ASSERT_FALSE(AutoGetCollectionForReadCommand(_opCtx, nss).getCollection()); }
+
+ BSONObjBuilder resultBuilder;
+ auto swResult = doNonAtomicApplyOps(
+ nss.db().toString(),
+ {
+ BSON("ts" << _presentTs << "t" << 1LL << "op"
+ << "c"
+ << "ui" << UUID::gen() << "ns" << nss.getCommandNS().ns() << "wall"
+ << Date_t() << "o" << BSON("create" << nss.coll())),
+ });
+ ASSERT_OK(swResult);
+
+ { ASSERT(AutoGetCollectionForReadCommand(_opCtx, nss).getCollection()); }
+
+ assertNamespaceInIdents(nss, _pastTs, false);
+ assertNamespaceInIdents(nss, _presentTs, true);
+ assertNamespaceInIdents(nss, _futureTs, true);
+ assertNamespaceInIdents(nss, _nullTs, true);
+}
+
+TEST_F(StorageTimestampTest, SecondaryCreateTwoCollections) {
+ // In order for applyOps to assign timestamps, we must be in non-replicated mode.
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+
+ std::string dbName = "unittest";
+ NamespaceString nss1(dbName, "secondaryCreateTwoCollections1");
+ NamespaceString nss2(dbName, "secondaryCreateTwoCollections2");
+ ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(_opCtx, nss1));
+ ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(_opCtx, nss2));
+
+ { ASSERT_FALSE(AutoGetCollectionForReadCommand(_opCtx, nss1).getCollection()); }
+ { ASSERT_FALSE(AutoGetCollectionForReadCommand(_opCtx, nss2).getCollection()); }
+
+ const LogicalTime dummyLt = const_cast<const LogicalTime&>(_futureLt).addTicks(1);
+ const Timestamp dummyTs = dummyLt.asTimestamp();
+
+ BSONObjBuilder resultBuilder;
+ auto swResult = doNonAtomicApplyOps(
+ dbName,
+ {
+ BSON("ts" << _presentTs << "t" << 1LL << "op"
+ << "c"
+ << "ui" << UUID::gen() << "ns" << nss1.getCommandNS().ns() << "wall"
+ << Date_t() << "o" << BSON("create" << nss1.coll())),
+ BSON("ts" << _futureTs << "t" << 1LL << "op"
+ << "c"
+ << "ui" << UUID::gen() << "ns" << nss2.getCommandNS().ns() << "wall"
+ << Date_t() << "o" << BSON("create" << nss2.coll())),
+ });
+ ASSERT_OK(swResult);
+
+ { ASSERT(AutoGetCollectionForReadCommand(_opCtx, nss1).getCollection()); }
+ { ASSERT(AutoGetCollectionForReadCommand(_opCtx, nss2).getCollection()); }
+
+ assertNamespaceInIdents(nss1, _pastTs, false);
+ assertNamespaceInIdents(nss1, _presentTs, true);
+ assertNamespaceInIdents(nss1, _futureTs, true);
+ assertNamespaceInIdents(nss1, dummyTs, true);
+ assertNamespaceInIdents(nss1, _nullTs, true);
+
+ assertNamespaceInIdents(nss2, _pastTs, false);
+ assertNamespaceInIdents(nss2, _presentTs, false);
+ assertNamespaceInIdents(nss2, _futureTs, true);
+ assertNamespaceInIdents(nss2, dummyTs, true);
+ assertNamespaceInIdents(nss2, _nullTs, true);
+}
+
+TEST_F(StorageTimestampTest, SecondaryCreateCollectionBetweenInserts) {
+ // In order for applyOps to assign timestamps, we must be in non-replicated mode.
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+
+ std::string dbName = "unittest";
+ NamespaceString nss1(dbName, "secondaryCreateCollectionBetweenInserts1");
+ NamespaceString nss2(dbName, "secondaryCreateCollectionBetweenInserts2");
+ BSONObj doc1 = BSON("_id" << 1 << "field" << 1);
+ BSONObj doc2 = BSON("_id" << 2 << "field" << 2);
+
+ const UUID uuid2 = UUID::gen();
+
+ const LogicalTime insert2Lt = const_cast<const LogicalTime&>(_futureLt).addTicks(1);
+ const Timestamp insert2Ts = insert2Lt.asTimestamp();
+
+ const LogicalTime dummyLt = insert2Lt.addTicks(1);
+ const Timestamp dummyTs = dummyLt.asTimestamp();
+
+ {
+ reset(nss1);
+ AutoGetCollection autoColl(_opCtx, nss1, LockMode::MODE_IX);
+
+ ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(_opCtx, nss2));
+ { ASSERT_FALSE(AutoGetCollectionForReadCommand(_opCtx, nss2).getCollection()); }
+
+ BSONObjBuilder resultBuilder;
+ auto swResult = doNonAtomicApplyOps(
+ dbName,
+ {
+ BSON("ts" << _presentTs << "t" << 1LL << "op"
+ << "i"
+ << "ns" << nss1.ns() << "ui" << autoColl.getCollection()->uuid() << "wall"
+ << Date_t() << "o" << doc1),
+ BSON("ts" << _futureTs << "t" << 1LL << "op"
+ << "c"
+ << "ui" << uuid2 << "ns" << nss2.getCommandNS().ns() << "wall" << Date_t()
+ << "o" << BSON("create" << nss2.coll())),
+ BSON("ts" << insert2Ts << "t" << 1LL << "op"
+ << "i"
+ << "ns" << nss2.ns() << "ui" << uuid2 << "wall" << Date_t() << "o"
+ << doc2),
+ });
+ ASSERT_OK(swResult);
+ }
+
+ {
+ AutoGetCollectionForReadCommand autoColl1(_opCtx, nss1);
+ const auto& coll1 = autoColl1.getCollection();
+ ASSERT(coll1);
+ AutoGetCollectionForReadCommand autoColl2(_opCtx, nss2);
+ const auto& coll2 = autoColl2.getCollection();
+ ASSERT(coll2);
+
+ assertDocumentAtTimestamp(coll1, _pastTs, BSONObj());
+ assertDocumentAtTimestamp(coll1, _presentTs, doc1);
+ assertDocumentAtTimestamp(coll1, _futureTs, doc1);
+ assertDocumentAtTimestamp(coll1, insert2Ts, doc1);
+ assertDocumentAtTimestamp(coll1, dummyTs, doc1);
+ assertDocumentAtTimestamp(coll1, _nullTs, doc1);
+
+ assertNamespaceInIdents(nss2, _pastTs, false);
+ assertNamespaceInIdents(nss2, _presentTs, false);
+ assertNamespaceInIdents(nss2, _futureTs, true);
+ assertNamespaceInIdents(nss2, insert2Ts, true);
+ assertNamespaceInIdents(nss2, dummyTs, true);
+ assertNamespaceInIdents(nss2, _nullTs, true);
+
+ assertDocumentAtTimestamp(coll2, _pastTs, BSONObj());
+ assertDocumentAtTimestamp(coll2, _presentTs, BSONObj());
+ assertDocumentAtTimestamp(coll2, _futureTs, BSONObj());
+ assertDocumentAtTimestamp(coll2, insert2Ts, doc2);
+ assertDocumentAtTimestamp(coll2, dummyTs, doc2);
+ assertDocumentAtTimestamp(coll2, _nullTs, doc2);
+ }
+}
+
+TEST_F(StorageTimestampTest, PrimaryCreateCollectionInApplyOps) {
+ NamespaceString nss("unittests.primaryCreateCollectionInApplyOps");
+ ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(_opCtx, nss));
+
+ { ASSERT_FALSE(AutoGetCollectionForReadCommand(_opCtx, nss).getCollection()); }
+
+ BSONObjBuilder resultBuilder;
+ auto swResult = doNonAtomicApplyOps(
+ nss.db().toString(),
+ {
+ BSON("ts" << _presentTs << "t" << 1LL << "op"
+ << "c"
+ << "ui" << UUID::gen() << "ns" << nss.getCommandNS().ns() << "wall"
+ << Date_t() << "o" << BSON("create" << nss.coll())),
+ });
+ ASSERT_OK(swResult);
+
+ { ASSERT(AutoGetCollectionForReadCommand(_opCtx, nss).getCollection()); }
+
+ BSONObj result;
+ ASSERT(Helpers::getLast(_opCtx, NamespaceString::kRsOplogNamespace.toString().c_str(), result));
+ repl::OplogEntry op(result);
+ ASSERT(op.getOpType() == repl::OpTypeEnum::kCommand) << op.toBSONForLogging();
+ // The next logOp() call will get 'futureTs', which will be the timestamp at which we do
+ // the write. Thus we expect the write to appear at 'futureTs' and not before.
+ ASSERT_EQ(op.getTimestamp(), _futureTs) << op.toBSONForLogging();
+ ASSERT_EQ(op.getNss().ns(), nss.getCommandNS().ns()) << op.toBSONForLogging();
+ ASSERT_BSONOBJ_EQ(op.getObject(), BSON("create" << nss.coll()));
+
+ assertNamespaceInIdents(nss, _pastTs, false);
+ assertNamespaceInIdents(nss, _presentTs, false);
+ assertNamespaceInIdents(nss, _futureTs, true);
+ assertNamespaceInIdents(nss, _nullTs, true);
+}
+
+TEST_F(StorageTimestampTest, SecondarySetIndexMultikeyOnInsert) {
+ // Pretend to be a secondary.
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+
+ NamespaceString nss("unittests.SecondarySetIndexMultikeyOnInsert");
+ reset(nss);
+ UUID uuid = UUID::gen();
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ uuid = autoColl.getCollection()->uuid();
+ }
+ auto indexName = "a_1";
+ auto indexSpec = BSON("name" << indexName << "key" << BSON("a" << 1) << "v"
+ << static_cast<int>(kIndexVersion));
+ ASSERT_OK(createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
+
+ _coordinatorMock->alwaysAllowWrites(false);
+
+ const LogicalTime pastTime = _clock->tickClusterTime(1);
+ const LogicalTime insertTime0 = _clock->tickClusterTime(1);
+ const LogicalTime insertTime1 = _clock->tickClusterTime(1);
+ const LogicalTime insertTime2 = _clock->tickClusterTime(1);
+
+ BSONObj doc0 = BSON("_id" << 0 << "a" << 3);
+ BSONObj doc1 = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
+ BSONObj doc2 = BSON("_id" << 2 << "a" << BSON_ARRAY(1 << 2));
+ auto op0 = repl::OplogEntry(
+ BSON("ts" << insertTime0.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc0));
+ auto op1 = repl::OplogEntry(
+ BSON("ts" << insertTime1.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc1));
+ auto op2 = repl::OplogEntry(
+ BSON("ts" << insertTime2.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc2));
+ std::vector<repl::OplogEntry> ops = {op0, op1, op2};
+
+ DoNothingOplogApplierObserver observer;
+ auto storageInterface = repl::StorageInterface::get(_opCtx);
+ auto writerPool = repl::makeReplWriterPool();
+ repl::OplogApplierImpl oplogApplier(
+ nullptr, // task executor. not required for applyOplogBatch().
+ nullptr, // oplog buffer. not required for applyOplogBatch().
+ &observer,
+ _coordinatorMock,
+ _consistencyMarkers,
+ storageInterface,
+ repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary),
+ writerPool.get());
+ ASSERT_EQUALS(op2.getOpTime(), unittest::assertGet(oplogApplier.applyOplogBatch(_opCtx, ops)));
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ assertMultikeyPaths(
+ _opCtx, autoColl.getCollection(), indexName, pastTime.asTimestamp(), false, {{}});
+ assertMultikeyPaths(
+ _opCtx, autoColl.getCollection(), indexName, insertTime0.asTimestamp(), true, {{0}});
+ assertMultikeyPaths(
+ _opCtx, autoColl.getCollection(), indexName, insertTime1.asTimestamp(), true, {{0}});
+ assertMultikeyPaths(
+ _opCtx, autoColl.getCollection(), indexName, insertTime2.asTimestamp(), true, {{0}});
+}
+
+TEST_F(StorageTimestampTest, SecondarySetWildcardIndexMultikeyOnInsert) {
+ // Pretend to be a secondary.
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+
+ NamespaceString nss("unittests.SecondarySetWildcardIndexMultikeyOnInsert");
+ // Use a capped collection to prevent the batch applier from grouping insert operations
+ // together in the same WUOW. This test attempts to apply operations out of order, but the
+ // storage engine does not allow an operation to set out-of-order timestamps in the same
+ // WUOW.
+ ASSERT_OK(createCollection(
+ _opCtx,
+ nss.db().toString(),
+ BSON("create" << nss.coll() << "capped" << true << "size" << 1 * 1024 * 1024)));
+ auto uuid = [&]() {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ return autoColl.getCollection()->uuid();
+ }();
+
+ auto indexName = "a_1";
+ auto indexSpec = BSON("name" << indexName << "key" << BSON("$**" << 1) << "v"
+ << static_cast<int>(kIndexVersion));
+ ASSERT_OK(createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
+
+ _coordinatorMock->alwaysAllowWrites(false);
+
+ const LogicalTime insertTime0 = _clock->tickClusterTime(1);
+ const LogicalTime insertTime1 = _clock->tickClusterTime(1);
+ const LogicalTime insertTime2 = _clock->tickClusterTime(1);
+
+ BSONObj doc0 = BSON("_id" << 0 << "a" << 3);
+ BSONObj doc1 = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
+ BSONObj doc2 = BSON("_id" << 2 << "a" << BSON_ARRAY(1 << 2));
+ auto op0 = repl::OplogEntry(
+ BSON("ts" << insertTime0.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc0));
+ auto op1 = repl::OplogEntry(
+ BSON("ts" << insertTime1.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc1));
+ auto op2 = repl::OplogEntry(
+ BSON("ts" << insertTime2.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc2));
+
+ // Coerce oplog application to apply op2 before op1. This does not guarantee the actual
+ // order of application however, because the oplog applier applies these operations in
+ // parallel across several threads. The test accepts the possibility of a false negative
+ // (test passes when it should fail) in favor of occasionally finding a true positive (test
+ // fails as intended).
+ std::vector<repl::OplogEntry> ops = {op0, op2, op1};
+
+ DoNothingOplogApplierObserver observer;
+ auto storageInterface = repl::StorageInterface::get(_opCtx);
+ auto writerPool = repl::makeReplWriterPool();
+ repl::OplogApplierImpl oplogApplier(
+ nullptr, // task executor. not required for applyOplogBatch().
+ nullptr, // oplog buffer. not required for applyOplogBatch().
+ &observer,
+ _coordinatorMock,
+ _consistencyMarkers,
+ storageInterface,
+ repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering),
+ writerPool.get());
+
+ uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops));
+
+ AutoGetCollectionForRead autoColl(_opCtx, nss);
+ auto wildcardIndexDescriptor =
+ autoColl.getCollection()->getIndexCatalog()->findIndexByName(_opCtx, indexName);
+ const IndexAccessMethod* wildcardIndexAccessMethod = autoColl.getCollection()
+ ->getIndexCatalog()
+ ->getEntry(wildcardIndexDescriptor)
+ ->accessMethod();
+ {
+ // Verify that, even though op2 was applied first, the multikey state is observed in all
+ // WiredTiger transactions that can contain the data written by op1.
+ OneOffRead oor(_opCtx, insertTime1.asTimestamp());
+ const WildcardAccessMethod* wam =
+ dynamic_cast<const WildcardAccessMethod*>(wildcardIndexAccessMethod);
+ MultikeyMetadataAccessStats stats;
+ std::set<FieldRef> paths = getWildcardMultikeyPathSet(wam, _opCtx, &stats);
+ ASSERT_EQUALS(1, paths.size());
+ ASSERT_EQUALS("a", paths.begin()->dottedField());
+ }
+ {
+ // Oplog application conservatively uses the first optime in the batch, insertTime0, as
+ // the point at which the index became multikey, despite the fact that the earliest op
+ // which caused the index to become multikey did not occur until insertTime1. This works
+ // because if we construct a query plan that incorrectly believes a particular path to
+ // be multikey, the plan will still be correct (if possibly sub-optimal). Conversely, if
+ // we were to construct a query plan that incorrectly believes a path is NOT multikey,
+ // it could produce incorrect results.
+ OneOffRead oor(_opCtx, insertTime0.asTimestamp());
+ const WildcardAccessMethod* wam =
+ dynamic_cast<const WildcardAccessMethod*>(wildcardIndexAccessMethod);
+ MultikeyMetadataAccessStats stats;
+ std::set<FieldRef> paths = getWildcardMultikeyPathSet(wam, _opCtx, &stats);
+ ASSERT_EQUALS(1, paths.size());
+ ASSERT_EQUALS("a", paths.begin()->dottedField());
+ }
+}
+
+TEST_F(StorageTimestampTest, SecondarySetWildcardIndexMultikeyOnUpdate) {
+ // Pretend to be a secondary.
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+
+ NamespaceString nss("unittests.SecondarySetWildcardIndexMultikeyOnUpdate");
+ reset(nss);
+ UUID uuid = UUID::gen();
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ uuid = autoColl.getCollection()->uuid();
+ }
+ auto indexName = "a_1";
+ auto indexSpec = BSON("name" << indexName << "key" << BSON("$**" << 1) << "v"
+ << static_cast<int>(kIndexVersion));
+ ASSERT_OK(createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
+
+ _coordinatorMock->alwaysAllowWrites(false);
+
+ const LogicalTime insertTime0 = _clock->tickClusterTime(1);
+ const LogicalTime updateTime1 = _clock->tickClusterTime(1);
+ const LogicalTime updateTime2 = _clock->tickClusterTime(1);
+
+ BSONObj doc0 = BSON("_id" << 0 << "a" << 3);
+ BSONObj doc1 = BSON("$v" << 1 << "$set" << BSON("a" << BSON_ARRAY(1 << 2)));
+ BSONObj doc2 = BSON("$v" << 1 << "$set" << BSON("a" << BSON_ARRAY(1 << 2)));
+ auto op0 = repl::OplogEntry(
+ BSON("ts" << insertTime0.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc0));
+ auto op1 =
+ repl::OplogEntry(BSON("ts" << updateTime1.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
+ << "u"
+ << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o"
+ << doc1 << "o2" << BSON("_id" << 0)));
+ auto op2 =
+ repl::OplogEntry(BSON("ts" << updateTime2.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
+ << "u"
+ << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o"
+ << doc2 << "o2" << BSON("_id" << 0)));
+
+ // Coerce oplog application to apply op2 before op1. This does not guarantee the actual
+ // order of application however, because the oplog applier applies these operations in
+ // parallel across several threads. The test accepts the possibility of a false negative
+ // (test passes when it should fail) in favor of occasionally finding a true positive (test
+ // fails as intended).
+ std::vector<repl::OplogEntry> ops = {op0, op2, op1};
+
+ DoNothingOplogApplierObserver observer;
+ auto storageInterface = repl::StorageInterface::get(_opCtx);
+ auto writerPool = repl::makeReplWriterPool();
+ repl::OplogApplierImpl oplogApplier(
+ nullptr, // task executor. not required for applyOplogBatch().
+ nullptr, // oplog buffer. not required for applyOplogBatch().
+ &observer,
+ _coordinatorMock,
+ _consistencyMarkers,
+ storageInterface,
+ repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering),
+ writerPool.get());
+
+ uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops));
+
+ AutoGetCollectionForRead autoColl(_opCtx, nss);
+ auto wildcardIndexDescriptor =
+ autoColl.getCollection()->getIndexCatalog()->findIndexByName(_opCtx, indexName);
+ const IndexAccessMethod* wildcardIndexAccessMethod = autoColl.getCollection()
+ ->getIndexCatalog()
+ ->getEntry(wildcardIndexDescriptor)
+ ->accessMethod();
+ {
+ // Verify that, even though op2 was applied first, the multikey state is observed in all
+ // WiredTiger transactions that can contain the data written by op1.
+ OneOffRead oor(_opCtx, updateTime1.asTimestamp());
+ const WildcardAccessMethod* wam =
+ dynamic_cast<const WildcardAccessMethod*>(wildcardIndexAccessMethod);
+ MultikeyMetadataAccessStats stats;
+ std::set<FieldRef> paths = getWildcardMultikeyPathSet(wam, _opCtx, &stats);
+ ASSERT_EQUALS(1, paths.size());
+ ASSERT_EQUALS("a", paths.begin()->dottedField());
+ }
+ {
+ // Oplog application conservatively uses the first optime in the batch, insertTime0, as
+ // the point at which the index became multikey, despite the fact that the earliest op
+ // which caused the index to become multikey did not occur until updateTime1. This works
+ // because if we construct a query plan that incorrectly believes a particular path to
+ // be multikey, the plan will still be correct (if possibly sub-optimal). Conversely, if
+ // we were to construct a query plan that incorrectly believes a path is NOT multikey,
+ // it could produce incorrect results.
+ OneOffRead oor(_opCtx, insertTime0.asTimestamp());
+ const WildcardAccessMethod* wam =
+ dynamic_cast<const WildcardAccessMethod*>(wildcardIndexAccessMethod);
+ MultikeyMetadataAccessStats stats;
+ std::set<FieldRef> paths = getWildcardMultikeyPathSet(wam, _opCtx, &stats);
+ ASSERT_EQUALS(1, paths.size());
+ ASSERT_EQUALS("a", paths.begin()->dottedField());
+ }
+}
+
+TEST_F(StorageTimestampTest, InitialSyncSetIndexMultikeyOnInsert) {
+ // Pretend to be a secondary.
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+
+ NamespaceString nss("unittests.InitialSyncSetIndexMultikeyOnInsert");
+ reset(nss);
+ UUID uuid = UUID::gen();
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ uuid = autoColl.getCollection()->uuid();
+ }
+ auto indexName = "a_1";
+ auto indexSpec = BSON("name" << indexName << "key" << BSON("a" << 1) << "v"
+ << static_cast<int>(kIndexVersion));
+ ASSERT_OK(createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
+
+ _coordinatorMock->alwaysAllowWrites(false);
+ ASSERT_OK(_coordinatorMock->setFollowerMode({repl::MemberState::MS::RS_STARTUP2}));
+
+ const LogicalTime pastTime = _clock->tickClusterTime(1);
+ const LogicalTime insertTime0 = _clock->tickClusterTime(1);
+ const LogicalTime indexBuildTime = _clock->tickClusterTime(1);
+ const LogicalTime insertTime1 = _clock->tickClusterTime(1);
+ const LogicalTime insertTime2 = _clock->tickClusterTime(1);
+
+ BSONObj doc0 = BSON("_id" << 0 << "a" << 3);
+ BSONObj doc1 = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
+ BSONObj doc2 = BSON("_id" << 2 << "a" << BSON_ARRAY(1 << 2));
+ auto op0 = repl::OplogEntry(
+ BSON("ts" << insertTime0.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc0));
+ auto op1 = repl::OplogEntry(
+ BSON("ts" << insertTime1.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc1));
+ auto op2 = repl::OplogEntry(
+ BSON("ts" << insertTime2.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc2));
+ auto indexSpec2 = BSON("createIndexes" << nss.coll() << "v" << static_cast<int>(kIndexVersion)
+ << "key" << BSON("b" << 1) << "name"
+ << "b_1");
+ auto createIndexOp =
+ repl::OplogEntry(BSON("ts" << indexBuildTime.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
+ << "c"
+ << "ns" << nss.getCommandNS().ns() << "ui" << uuid << "wall"
+ << Date_t() << "o" << indexSpec2));
+
+ // We add in an index creation op to test that we restart tracking multikey path info
+ // after bulk index builds.
+ std::vector<repl::OplogEntry> ops = {op0, createIndexOp, op1, op2};
+
+ DoNothingOplogApplierObserver observer;
+ auto storageInterface = repl::StorageInterface::get(_opCtx);
+ auto writerPool = repl::makeReplWriterPool();
+
+ repl::OplogApplierImpl oplogApplier(
+ nullptr, // task executor. not required for applyOplogBatch().
+ nullptr, // oplog buffer. not required for applyOplogBatch().
+ &observer,
+ _coordinatorMock,
+ _consistencyMarkers,
+ storageInterface,
+ repl::OplogApplier::Options(repl::OplogApplication::Mode::kInitialSync),
+ writerPool.get());
+ auto lastTime = unittest::assertGet(oplogApplier.applyOplogBatch(_opCtx, ops));
+ ASSERT_EQ(lastTime.getTimestamp(), insertTime2.asTimestamp());
+
+ // Wait for the index build to finish before making any assertions.
+ IndexBuildsCoordinator::get(_opCtx)->awaitNoIndexBuildInProgressForCollection(_opCtx, uuid);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+
+ // Ensure minimumVisible has not been updated due to the index creation.
+ ASSERT_LT(autoColl.getCollection()->getMinimumVisibleSnapshot().get(), pastTime.asTimestamp());
+
+ // Reading the multikey state before 'insertTime0' is not valid or reliable to test. If the
+ // background index build intercepts and drains writes during inital sync, the index write
+ // and the write to the multikey path state will not be timestamped. This write is not
+ // timestamped because the lastApplied timestamp, which would normally be used on a primary
+ // or secondary, is not always available during initial sync.
+ // Additionally, it is not valid to read at a timestamp before inital sync completes, so
+ // these assertions below only make sense in the context of this unit test, but would
+ // otherwise not be exercised in any normal scenario.
+ assertMultikeyPaths(
+ _opCtx, autoColl.getCollection(), indexName, insertTime0.asTimestamp(), true, {{0}});
+ assertMultikeyPaths(
+ _opCtx, autoColl.getCollection(), indexName, insertTime1.asTimestamp(), true, {{0}});
+ assertMultikeyPaths(
+ _opCtx, autoColl.getCollection(), indexName, insertTime2.asTimestamp(), true, {{0}});
+}
+
+TEST_F(StorageTimestampTest, PrimarySetIndexMultikeyOnInsert) {
+ NamespaceString nss("unittests.PrimarySetIndexMultikeyOnInsert");
+ reset(nss);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ auto indexName = "a_1";
+ auto indexSpec = BSON("name" << indexName << "key" << BSON("a" << 1) << "v"
+ << static_cast<int>(kIndexVersion));
+ ASSERT_OK(createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
+
+ const LogicalTime pastTime = _clock->tickClusterTime(1);
+ const LogicalTime insertTime = pastTime.addTicks(1);
+
+ BSONObj doc = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
+ WriteUnitOfWork wunit(_opCtx);
+ insertDocument(autoColl.getCollection(), InsertStatement(doc));
+ wunit.commit();
+
+ assertMultikeyPaths(
+ _opCtx, autoColl.getCollection(), indexName, pastTime.asTimestamp(), false, {{}});
+ assertMultikeyPaths(
+ _opCtx, autoColl.getCollection(), indexName, insertTime.asTimestamp(), true, {{0}});
+}
+
+TEST_F(StorageTimestampTest, PrimarySetIndexMultikeyOnInsertUnreplicated) {
+ // Use an unreplicated collection.
+ NamespaceString nss("unittests.system.profile");
+ reset(nss);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ auto indexName = "a_1";
+ auto indexSpec = BSON("name" << indexName << "key" << BSON("a" << 1) << "v"
+ << static_cast<int>(kIndexVersion));
+ ASSERT_OK(createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
+
+ const LogicalTime pastTime = _clock->tickClusterTime(1);
+ const LogicalTime insertTime = pastTime.addTicks(1);
+
+ BSONObj doc = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
+ WriteUnitOfWork wunit(_opCtx);
+ insertDocument(autoColl.getCollection(), InsertStatement(doc));
+ wunit.commit();
+
+ assertMultikeyPaths(
+ _opCtx, autoColl.getCollection(), indexName, pastTime.asTimestamp(), true, {{0}});
+ assertMultikeyPaths(
+ _opCtx, autoColl.getCollection(), indexName, insertTime.asTimestamp(), true, {{0}});
+}
+
+TEST_F(StorageTimestampTest, PrimarySetsMultikeyInsideMultiDocumentTransaction) {
+ auto service = _opCtx->getServiceContext();
+ auto sessionCatalog = SessionCatalog::get(service);
+ sessionCatalog->reset_forTest();
+ MongoDSessionCatalog::onStepUp(_opCtx);
+
+ NamespaceString nss("unittests.PrimarySetsMultikeyInsideMultiDocumentTransaction");
+ reset(nss);
+
+ auto indexName = "a_1";
+ auto indexSpec = BSON("name" << indexName << "ns" << nss.ns() << "key" << BSON("a" << 1) << "v"
+ << static_cast<int>(kIndexVersion));
+ auto doc = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
+
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ ASSERT_OK(createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
+ }
+
+ const auto currentTime = _clock->getTime();
+ const auto presentTs = currentTime.clusterTime().asTimestamp();
+
+ // This test does not run a real ReplicationCoordinator, so must advance the snapshot
+ // manager manually.
+ auto storageEngine = cc().getServiceContext()->getStorageEngine();
+ storageEngine->getSnapshotManager()->setLastApplied(presentTs);
+
+ const auto beforeTxnTime = _clock->tickClusterTime(1);
+ auto beforeTxnTs = beforeTxnTime.asTimestamp();
+ const auto multikeyNoopTime = beforeTxnTime.addTicks(1);
+ auto multikeyNoopTs = multikeyNoopTime.asTimestamp();
+ auto commitEntryTs = multikeyNoopTime.addTicks(1).asTimestamp();
+
+ LOGV2(22502, "Present time", "timestamp"_attr = presentTs);
+ LOGV2(22503, "Before transaction time", "timestamp"_attr = beforeTxnTs);
+ LOGV2(4801000, "Multikey noop time", "timestamp"_attr = multikeyNoopTs);
+ LOGV2(22504, "Commit entry time", "timestamp"_attr = commitEntryTs);
+
+ const auto sessionId = makeLogicalSessionIdForTest();
+ _opCtx->setLogicalSessionId(sessionId);
+ _opCtx->setTxnNumber(1);
+ _opCtx->setInMultiDocumentTransaction();
+
+ // Check out the session.
+ MongoDOperationContextSession ocs(_opCtx);
+
+ auto txnParticipant = TransactionParticipant::get(_opCtx);
+ ASSERT(txnParticipant);
+
+ txnParticipant.beginOrContinue(
+ _opCtx, {*_opCtx->getTxnNumber()}, false /* autocommit */, true /* startTransaction */);
+ txnParticipant.unstashTransactionResources(_opCtx, "insert");
+ {
+ // Insert a document that will set the index as multikey.
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ insertDocument(autoColl.getCollection(), InsertStatement(doc));
+ }
+
+ txnParticipant.commitUnpreparedTransaction(_opCtx);
+ txnParticipant.stashTransactionResources(_opCtx);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ const auto& coll = autoColl.getCollection();
+
+ // Make sure the transaction committed and its writes were timestamped correctly.
+ assertDocumentAtTimestamp(coll, presentTs, BSONObj());
+ assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
+ assertDocumentAtTimestamp(coll, multikeyNoopTs, BSONObj());
+ assertDocumentAtTimestamp(coll, commitEntryTs, doc);
+ assertDocumentAtTimestamp(coll, _nullTs, doc);
+
+ // Make sure the multikey write was timestamped correctly. For correctness, the timestamp of
+ // the write that sets the multikey flag to true should be less than or equal to the first
+ // write that made the index multikey, which, in this case, is the commit timestamp of the
+ // transaction. In other words, it is not incorrect to assign a timestamp that is too early,
+ // but it is incorrect to assign a timestamp that is too late. In this specific case, we
+ // expect the write to be timestamped at the logical clock tick directly preceding the
+ // commit time.
+ assertMultikeyPaths(_opCtx, coll, indexName, presentTs, false /* shouldBeMultikey */, {{}});
+ assertMultikeyPaths(_opCtx, coll, indexName, beforeTxnTs, false /* shouldBeMultikey */, {{}});
+ assertMultikeyPaths(
+ _opCtx, coll, indexName, multikeyNoopTs, true /* shouldBeMultikey */, {{0}});
+ assertMultikeyPaths(_opCtx, coll, indexName, commitEntryTs, true /* shouldBeMultikey */, {{0}});
+ assertMultikeyPaths(_opCtx, coll, indexName, _nullTs, true /* shouldBeMultikey */, {{0}});
+}
+
+TEST_F(StorageTimestampTest, InitializeMinValid) {
+ NamespaceString nss(repl::ReplicationConsistencyMarkersImpl::kDefaultMinValidNamespace);
+ reset(nss);
+
+ repl::ReplicationConsistencyMarkersImpl consistencyMarkers(repl::StorageInterface::get(_opCtx));
+ consistencyMarkers.initializeMinValidDocument(_opCtx);
+
+ repl::MinValidDocument expectedMinValid;
+ expectedMinValid.setMinValidTerm(repl::OpTime::kUninitializedTerm);
+ expectedMinValid.setMinValidTimestamp(_nullTs);
+
+ assertMinValidDocumentAtTimestamp(nss, _nullTs, expectedMinValid);
+ assertMinValidDocumentAtTimestamp(nss, _pastTs, expectedMinValid);
+ assertMinValidDocumentAtTimestamp(nss, _presentTs, expectedMinValid);
+ assertMinValidDocumentAtTimestamp(nss, _futureTs, expectedMinValid);
+}
+
+TEST_F(StorageTimestampTest, SetMinValidInitialSyncFlag) {
+ NamespaceString nss(repl::ReplicationConsistencyMarkersImpl::kDefaultMinValidNamespace);
+ reset(nss);
+
+ repl::ReplicationConsistencyMarkersImpl consistencyMarkers(repl::StorageInterface::get(_opCtx));
+ ASSERT(consistencyMarkers.createInternalCollections(_opCtx).isOK());
+ consistencyMarkers.initializeMinValidDocument(_opCtx);
+ consistencyMarkers.setInitialSyncFlag(_opCtx);
+
+ repl::MinValidDocument expectedMinValidWithSetFlag;
+ expectedMinValidWithSetFlag.setMinValidTerm(repl::OpTime::kUninitializedTerm);
+ expectedMinValidWithSetFlag.setMinValidTimestamp(_nullTs);
+ expectedMinValidWithSetFlag.setInitialSyncFlag(true);
+
+ assertMinValidDocumentAtTimestamp(nss, _nullTs, expectedMinValidWithSetFlag);
+
+ consistencyMarkers.clearInitialSyncFlag(_opCtx);
+
+ repl::MinValidDocument expectedMinValidWithUnsetFlag;
+ expectedMinValidWithSetFlag.setMinValidTerm(repl::OpTime::kUninitializedTerm);
+ expectedMinValidWithSetFlag.setMinValidTimestamp(_nullTs);
+
+ assertMinValidDocumentAtTimestamp(nss, _nullTs, expectedMinValidWithUnsetFlag);
+}
+
+TEST_F(StorageTimestampTest, SetMinValidAppliedThrough) {
+ NamespaceString nss(repl::ReplicationConsistencyMarkersImpl::kDefaultMinValidNamespace);
+ reset(nss);
+
+ repl::ReplicationConsistencyMarkersImpl consistencyMarkers(repl::StorageInterface::get(_opCtx));
+ consistencyMarkers.initializeMinValidDocument(_opCtx);
+
+ repl::MinValidDocument expectedMinValidInit;
+ expectedMinValidInit.setMinValidTerm(repl::OpTime::kUninitializedTerm);
+ expectedMinValidInit.setMinValidTimestamp(_nullTs);
+ assertMinValidDocumentAtTimestamp(nss, _nullTs, expectedMinValidInit);
+
+ consistencyMarkers.setAppliedThrough(_opCtx, repl::OpTime(_presentTs, _presentTerm));
+
+ repl::MinValidDocument expectedMinValidPresent;
+ expectedMinValidPresent.setMinValidTerm(repl::OpTime::kUninitializedTerm);
+ expectedMinValidPresent.setMinValidTimestamp(_nullTs);
+ expectedMinValidPresent.setAppliedThrough(repl::OpTime(_presentTs, _presentTerm));
+
+ assertMinValidDocumentAtTimestamp(nss, _nullTs, expectedMinValidPresent);
+
+ // appliedThrough opTime can be unset.
+ consistencyMarkers.clearAppliedThrough(_opCtx);
+ assertMinValidDocumentAtTimestamp(nss, _nullTs, expectedMinValidInit);
+}
+
+/**
+ * This KVDropDatabase test only exists in this file for historical reasons, the final phase of
+ * timestamping `dropDatabase` side-effects no longer applies. The purpose of this test is to
+ * exercise the `StorageEngine::dropDatabase` method.
+ */
+class KVDropDatabase : public StorageTimestampTest {
+private:
+ void _doTest() {
+ // Not actually called.
+ }
+
+public:
+ void run(bool simulatePrimary) {
+ auto storageInterface = repl::StorageInterface::get(_opCtx);
+ repl::DropPendingCollectionReaper::set(
+ _opCtx->getServiceContext(),
+ std::make_unique<repl::DropPendingCollectionReaper>(storageInterface));
+
+ auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
+ auto durableCatalog = storageEngine->getCatalog();
+
+ // Declare the database to be in a "synced" state, i.e: in steady-state replication.
+ Timestamp syncTime = _clock->tickClusterTime(1).asTimestamp();
+ invariant(!syncTime.isNull());
+ storageEngine->setInitialDataTimestamp(syncTime);
+
+ // This test drops collections piece-wise instead of having the "drop database" algorithm
+ // perform this walk. Defensively operate on a separate DB from the other tests to ensure
+ // no leftover collections carry-over.
+ const NamespaceString nss("unittestsDropDB.kvDropDatabase");
+ const NamespaceString sysProfile("unittestsDropDB.system.profile");
+
+ std::string collIdent;
+ std::string indexIdent;
+ std::string sysProfileIdent;
+ // `*.system.profile` does not have an `_id` index. Just create it to abide by the API. This
+ // value will be the empty string. Helper methods accommodate this.
+ std::string sysProfileIndexIdent;
+ for (auto& tuple : {std::tie(nss, collIdent, indexIdent),
+ std::tie(sysProfile, sysProfileIdent, sysProfileIndexIdent)}) {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
+
+ // Save the pre-state idents so we can capture the specific idents related to collection
+ // creation.
+ std::vector<std::string> origIdents = durableCatalog->getAllIdents(_opCtx);
+ const auto& nss = std::get<0>(tuple);
+
+ // Non-replicated namespaces are wrapped in an unreplicated writes block. This has the
+ // side-effect of not timestamping the collection creation.
+ repl::UnreplicatedWritesBlock notReplicated(_opCtx);
+ if (nss.isReplicated()) {
+ TimestampBlock tsBlock(_opCtx, _clock->tickClusterTime(1).asTimestamp());
+ reset(nss);
+ } else {
+ reset(nss);
+ }
+
+ // Bind the local values to the variables in the parent scope.
+ auto& collIdent = std::get<1>(tuple);
+ auto& indexIdent = std::get<2>(tuple);
+ std::tie(collIdent, indexIdent) =
+ getNewCollectionIndexIdent(durableCatalog, origIdents);
+ }
+
+ AutoGetCollection coll(_opCtx, nss, LockMode::MODE_X);
+ {
+ // Drop/rename `kvDropDatabase`. `system.profile` does not get dropped/renamed.
+ WriteUnitOfWork wuow(_opCtx);
+ Database* db = coll.getDb();
+ ASSERT_OK(db->dropCollection(_opCtx, nss));
+ wuow.commit();
+ }
+
+ // Reserve a tick, this represents a time after the rename in which the `kvDropDatabase`
+ // ident for `kvDropDatabase` still exists.
+ const Timestamp postRenameTime = _clock->tickClusterTime(1).asTimestamp();
+
+ // If the storage engine is managing drops internally, the ident should not be visible after
+ // a drop.
+ if (storageEngine->supportsPendingDrops()) {
+ assertIdentsMissingAtTimestamp(durableCatalog, collIdent, indexIdent, postRenameTime);
+ } else {
+ // The namespace has changed, but the ident still exists as-is after the rename.
+ assertIdentsExistAtTimestamp(durableCatalog, collIdent, indexIdent, postRenameTime);
+ }
+
+ const Timestamp dropTime = _clock->tickClusterTime(1).asTimestamp();
+ if (simulatePrimary) {
+ ASSERT_OK(dropDatabaseForApplyOps(_opCtx, nss.db().toString()));
+ } else {
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+ TimestampBlock ts(_opCtx, dropTime);
+ ASSERT_OK(dropDatabaseForApplyOps(_opCtx, nss.db().toString()));
+ }
+
+ // Assert that the idents do not exist.
+ assertIdentsMissingAtTimestamp(
+ durableCatalog, sysProfileIdent, sysProfileIndexIdent, Timestamp::max());
+ assertIdentsMissingAtTimestamp(durableCatalog, collIdent, indexIdent, Timestamp::max());
+
+ // dropDatabase must not timestamp the final write. The collection and index should seem
+ // to have never existed.
+ assertIdentsMissingAtTimestamp(durableCatalog, collIdent, indexIdent, syncTime);
+
+ // Reset initial data timestamp to avoid unintended storage engine timestamp side effects.
+ storageEngine->setInitialDataTimestamp(Timestamp(0, 0));
+ }
+};
+
+TEST(StorageTimestampTest, KVDropDatabasePrimary) {
+ KVDropDatabase test;
+ test.run(true);
+ test.run(false);
+}
+
+/**
+ * This test asserts that the catalog updates that represent the beginning and end of an index
+ * build are timestamped. Additionally, the index will be `multikey` and that catalog update that
+ * finishes the index build will also observe the index is multikey.
+ *
+ * Primaries log no-ops when starting an index build to acquire a timestamp. A primary committing
+ * an index build gets timestamped when the `createIndexes` command creates an oplog entry. That
+ * step is mimiced here.
+ *
+ * Secondaries timestamp starting their index build by being in a `TimestampBlock` when the oplog
+ * entry is processed. Secondaries will look at the logical clock when completing the index
+ * build. This is safe so long as completion is not racing with secondary oplog application (i.e:
+ * enforced via the parallel batch writer mode lock).
+ */
+class TimestampIndexBuilds : public StorageTimestampTest {
+private:
+ void _doTest() {
+ // Not actually called.
+ }
+
+public:
+ void run(bool simulatePrimary) {
+ const bool simulateSecondary = !simulatePrimary;
+ if (simulateSecondary) {
+ // The MemberState is inspected during index builds to use a "ghost" write to timestamp
+ // index completion.
+ ASSERT_OK(_coordinatorMock->setFollowerMode({repl::MemberState::MS::RS_SECONDARY}));
+ }
+
+ auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
+ auto durableCatalog = storageEngine->getCatalog();
+
+ NamespaceString nss("unittests.timestampIndexBuilds");
+ reset(nss);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
+ CollectionWriter coll(_opCtx, autoColl);
+
+ RecordId catalogId = autoColl.getCollection()->getCatalogId();
+
+ const LogicalTime insertTimestamp = _clock->tickClusterTime(1);
+ {
+ WriteUnitOfWork wuow(_opCtx);
+ insertDocument(coll.get(),
+ InsertStatement(BSON("_id" << 0 << "a" << BSON_ARRAY(1 << 2)),
+ insertTimestamp.asTimestamp(),
+ _presentTerm));
+ wuow.commit();
+ ASSERT_EQ(1, itCount(autoColl.getCollection()));
+ }
+
+ // Save the pre-state idents so we can capture the specific ident related to index
+ // creation.
+ std::vector<std::string> origIdents = durableCatalog->getAllIdents(_opCtx);
+
+ // Build an index on `{a: 1}`. This index will be multikey.
+ MultiIndexBlock indexer;
+ ScopeGuard abortOnExit(
+ [&] { indexer.abortIndexBuild(_opCtx, coll, MultiIndexBlock::kNoopOnCleanUpFn); });
+ const LogicalTime beforeIndexBuild = _clock->tickClusterTime(2);
+ BSONObj indexInfoObj;
+ {
+ // Primaries do not have a wrapping `TimestampBlock`; secondaries do.
+ const Timestamp commitTimestamp =
+ simulatePrimary ? Timestamp::min() : beforeIndexBuild.addTicks(1).asTimestamp();
+ TimestampBlock tsBlock(_opCtx, commitTimestamp);
+
+ // Secondaries will also be in an `UnreplicatedWritesBlock` that prevents the `logOp`
+ // from making creating an entry.
+ boost::optional<repl::UnreplicatedWritesBlock> unreplicated;
+ if (simulateSecondary) {
+ unreplicated.emplace(_opCtx);
+ }
+
+ auto swIndexInfoObj = indexer.init(
+ _opCtx,
+ coll,
+ {BSON("v" << 2 << "unique" << true << "name"
+ << "a_1"
+ << "key" << BSON("a" << 1))},
+ MultiIndexBlock::makeTimestampedIndexOnInitFn(_opCtx, autoColl.getCollection()));
+ ASSERT_OK(swIndexInfoObj.getStatus());
+ indexInfoObj = std::move(swIndexInfoObj.getValue()[0]);
+ }
+
+ const LogicalTime afterIndexInit = _clock->tickClusterTime(2);
+
+ // Inserting all the documents has the side-effect of setting internal state on the index
+ // builder that the index is multikey.
+ ASSERT_OK(indexer.insertAllDocumentsInCollection(_opCtx, autoColl.getCollection()));
+ ASSERT_OK(indexer.checkConstraints(_opCtx, autoColl.getCollection()));
+
+ {
+ WriteUnitOfWork wuow(_opCtx);
+ // All callers of `MultiIndexBlock::commit` are responsible for timestamping index
+ // completion Primaries write an oplog entry. Secondaries explicitly set a
+ // timestamp.
+ ASSERT_OK(
+ indexer.commit(_opCtx,
+ autoColl.getWritableCollection(_opCtx),
+ [&](const BSONObj& indexSpec) {
+ if (simulatePrimary) {
+ // The timestamping responsibility for each index is placed
+ // on the caller.
+ _opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
+ _opCtx, nss, coll->uuid(), indexSpec, false);
+ } else {
+ const auto currentTime = _clock->getTime();
+ ASSERT_OK(_opCtx->recoveryUnit()->setTimestamp(
+ currentTime.clusterTime().asTimestamp()));
+ }
+ },
+ MultiIndexBlock::kNoopOnCommitFn));
+ wuow.commit();
+ }
+ abortOnExit.dismiss();
+
+ const Timestamp afterIndexBuild = _clock->tickClusterTime(1).asTimestamp();
+
+ const std::string indexIdent =
+ getNewIndexIdentAtTime(durableCatalog, origIdents, Timestamp::min());
+ assertIdentsMissingAtTimestamp(
+ durableCatalog, "", indexIdent, beforeIndexBuild.asTimestamp());
+
+ // Assert that the index entry exists after init and `ready: false`.
+ assertIdentsExistAtTimestamp(durableCatalog, "", indexIdent, afterIndexInit.asTimestamp());
+ {
+ ASSERT_FALSE(
+ getIndexMetaData(
+ getMetaDataAtTime(durableCatalog, catalogId, afterIndexInit.asTimestamp()),
+ "a_1")
+ .ready);
+ }
+
+ // After the build completes, assert that the index is `ready: true` and multikey.
+ assertIdentsExistAtTimestamp(durableCatalog, "", indexIdent, afterIndexBuild);
+ {
+ auto indexMetaData = getIndexMetaData(
+ getMetaDataAtTime(durableCatalog, catalogId, afterIndexBuild), "a_1");
+ ASSERT(indexMetaData.ready);
+ ASSERT(indexMetaData.multikey);
+
+ ASSERT_EQ(std::size_t(1), indexMetaData.multikeyPaths.size());
+ const bool match = indexMetaData.multikeyPaths[0] == MultikeyComponents({0});
+ if (!match) {
+ FAIL(str::stream() << "Expected: [ [ 0 ] ] Actual: "
+ << dumpMultikeyPaths(indexMetaData.multikeyPaths));
+ }
+ }
+ }
+};
+
+TEST(StorageTimestampTest, TimestampIndexBuilds) {
+ TimestampIndexBuilds test;
+ test.run(false);
+ test.run(true);
+}
+
+TEST_F(StorageTimestampTest, TimestampMultiIndexBuilds) {
+ auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
+ auto durableCatalog = storageEngine->getCatalog();
+
+ // Create config.system.indexBuilds collection to store commit quorum value during index
+ // building.
+ ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(
+ _opCtx, NamespaceString::kIndexBuildEntryNamespace));
+ ASSERT_OK(
+ createCollection(_opCtx,
+ NamespaceString::kIndexBuildEntryNamespace.db().toString(),
+ BSON("create" << NamespaceString::kIndexBuildEntryNamespace.coll())));
+
+ NamespaceString nss("unittests.timestampMultiIndexBuilds");
+ reset(nss);
+
+ std::vector<std::string> origIdents;
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
+
+ const LogicalTime insertTimestamp = _clock->tickClusterTime(1);
+
+ WriteUnitOfWork wuow(_opCtx);
+ insertDocument(autoColl.getCollection(),
+ InsertStatement(BSON("_id" << 0 << "a" << 1 << "b" << 2 << "c" << 3),
+ insertTimestamp.asTimestamp(),
+ _presentTerm));
+ wuow.commit();
+ ASSERT_EQ(1, itCount(autoColl.getCollection()));
+
+ // Save the pre-state idents so we can capture the specific ident related to index
+ // creation.
+ origIdents = durableCatalog->getAllIdents(_opCtx);
+
+ // Ensure we have a committed snapshot to avoid ReadConcernMajorityNotAvailableYet
+ // error at the beginning of the the collection scan phase.
+ auto snapshotManager = storageEngine->getSnapshotManager();
+ snapshotManager->setCommittedSnapshot(insertTimestamp.asTimestamp());
+ }
+
+ DBDirectClient client(_opCtx);
+ {
+ // Disable index build commit quorum as we don't have support of replication subsystem
+ // for voting.
+ auto index1 = BSON("v" << kIndexVersion << "key" << BSON("a" << 1) << "name"
+ << "a_1");
+ auto index2 = BSON("v" << kIndexVersion << "key" << BSON("b" << 1) << "name"
+ << "b_1");
+ auto createIndexesCmdObj =
+ BSON("createIndexes" << nss.coll() << "indexes" << BSON_ARRAY(index1 << index2)
+ << "commitQuorum" << 0);
+ BSONObj result;
+ ASSERT(client.runCommand(nss.db().toString(), createIndexesCmdObj, result)) << result;
+ }
+
+ auto indexCreateInitTs = queryOplog(BSON("op"
+ << "c"
+ << "o.startIndexBuild" << nss.coll()
+ << "o.indexes.0.name"
+ << "a_1"))["ts"]
+ .timestamp();
+ auto commitIndexBuildTs = queryOplog(BSON("op"
+ << "c"
+ << "o.commitIndexBuild" << nss.coll()
+ << "o.indexes.0.name"
+ << "a_1"))["ts"]
+ .timestamp();
+ auto indexBComplete = commitIndexBuildTs;
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_S);
+ RecordId catalogId = autoColl.getCollection()->getCatalogId();
+
+ // The idents are created and persisted with the "ready: false" write.
+ // There should be two new index idents visible at this time.
+ const std::vector<std::string> indexes =
+ getNewIndexIdentsAtTime(durableCatalog, origIdents, indexCreateInitTs);
+ ASSERT_EQ(static_cast<std::size_t>(2), indexes.size()) << " Num idents: " << indexes.size();
+
+ ASSERT_FALSE(
+ getIndexMetaData(getMetaDataAtTime(durableCatalog, catalogId, indexCreateInitTs), "a_1")
+ .ready);
+ ASSERT_FALSE(
+ getIndexMetaData(getMetaDataAtTime(durableCatalog, catalogId, indexCreateInitTs), "b_1")
+ .ready);
+
+ // Assert the `b_1` index becomes ready at the last oplog entry time.
+ ASSERT_TRUE(
+ getIndexMetaData(getMetaDataAtTime(durableCatalog, catalogId, indexBComplete), "a_1")
+ .ready);
+ ASSERT_TRUE(
+ getIndexMetaData(getMetaDataAtTime(durableCatalog, catalogId, indexBComplete), "b_1")
+ .ready);
+
+ // Assert that the index build is removed from config.system.indexBuilds collection after
+ // completion.
+ {
+ AutoGetCollectionForRead collection(_opCtx, NamespaceString::kIndexBuildEntryNamespace);
+ ASSERT_TRUE(collection);
+
+ // At the commitIndexBuild entry time, the index build be still be present in the
+ // indexBuilds collection.
+ {
+ OneOffRead oor(_opCtx, indexBComplete);
+ // Fails if the collection is empty.
+ findOne(collection.getCollection());
+ }
+
+ // After the index build has finished, we should not see the doc in the indexBuilds
+ // collection.
+ ASSERT_EQUALS(0, itCount(collection.getCollection()));
+ }
+}
+
+TEST_F(StorageTimestampTest, TimestampMultiIndexBuildsDuringRename) {
+ auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
+ auto durableCatalog = storageEngine->getCatalog();
+
+ NamespaceString nss("unittests.timestampMultiIndexBuildsDuringRename");
+ reset(nss);
+
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
+
+ const LogicalTime insertTimestamp = _clock->tickClusterTime(1);
+
+ WriteUnitOfWork wuow(_opCtx);
+ insertDocument(autoColl.getCollection(),
+ InsertStatement(BSON("_id" << 0 << "a" << 1 << "b" << 2 << "c" << 3),
+ insertTimestamp.asTimestamp(),
+ _presentTerm));
+ wuow.commit();
+ ASSERT_EQ(1, itCount(autoColl.getCollection()));
+
+ // Ensure we have a committed snapshot to avoid ReadConcernMajorityNotAvailableYet
+ // error at the beginning of the the collection scan phase.
+ auto snapshotManager = storageEngine->getSnapshotManager();
+ snapshotManager->setCommittedSnapshot(insertTimestamp.asTimestamp());
+ }
+
+ DBDirectClient client(_opCtx);
+ {
+ // Disable index build commit quorum as we don't have support of replication subsystem
+ // for voting.
+ auto index1 = BSON("v" << kIndexVersion << "key" << BSON("a" << 1) << "name"
+ << "a_1");
+ auto index2 = BSON("v" << kIndexVersion << "key" << BSON("b" << 1) << "name"
+ << "b_1");
+ auto createIndexesCmdObj =
+ BSON("createIndexes" << nss.coll() << "indexes" << BSON_ARRAY(index1 << index2)
+ << "commitQuorum" << 0);
+ BSONObj result;
+ ASSERT(client.runCommand(nss.db().toString(), createIndexesCmdObj, result)) << result;
+ }
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
+
+ NamespaceString renamedNss("unittestsRename.timestampMultiIndexBuildsDuringRename");
+ reset(renamedNss);
+
+ // Save the pre-state idents so we can capture the specific ident related to index
+ // creation.
+ std::vector<std::string> origIdents = durableCatalog->getAllIdents(_opCtx);
+
+ // Rename collection.
+ BSONObj renameResult;
+ ASSERT(client.runCommand(
+ "admin",
+ BSON("renameCollection" << nss.ns() << "to" << renamedNss.ns() << "dropTarget" << true),
+ renameResult))
+ << renameResult;
+
+ NamespaceString tmpName;
+ Timestamp indexCommitTs;
+ // Empty temporary collections generate createIndexes oplog entry even if the node
+ // supports 2 phase index build.
+ const auto createIndexesDocument =
+ queryOplog(BSON("ns" << renamedNss.db() + ".$cmd"
+ << "o.createIndexes" << BSON("$exists" << true) << "o.name"
+ << "b_1"));
+ const auto tmpCollName =
+ createIndexesDocument.getObjectField("o").getStringField("createIndexes");
+ tmpName = NamespaceString(renamedNss.db(), tmpCollName);
+ indexCommitTs = createIndexesDocument["ts"].timestamp();
+ const Timestamp indexCreateInitTs = queryOplog(BSON("op"
+ << "c"
+ << "o.create" << tmpName.coll()))["ts"]
+ .timestamp();
+
+
+ // We expect one new collection ident and one new index ident (the _id index) during this
+ // rename.
+ assertRenamedCollectionIdentsAtTimestamp(
+ durableCatalog, origIdents, /*expectedNewIndexIdents*/ 1, indexCreateInitTs);
+
+ // We expect one new collection ident and three new index idents (including the _id index)
+ // after this rename. The a_1 and b_1 index idents are created and persisted with the
+ // "ready: true" write.
+ assertRenamedCollectionIdentsAtTimestamp(
+ durableCatalog, origIdents, /*expectedNewIndexIdents*/ 3, indexCommitTs);
+
+ // Assert the 'a_1' and `b_1` indexes becomes ready at the last oplog entry time.
+ RecordId renamedCatalogId = CollectionCatalog::get(_opCtx)
+ ->lookupCollectionByNamespace(_opCtx, renamedNss)
+ ->getCatalogId();
+ ASSERT_TRUE(
+ getIndexMetaData(getMetaDataAtTime(durableCatalog, renamedCatalogId, indexCommitTs), "a_1")
+ .ready);
+ ASSERT_TRUE(
+ getIndexMetaData(getMetaDataAtTime(durableCatalog, renamedCatalogId, indexCommitTs), "b_1")
+ .ready);
+}
+
+/**
+ * This test asserts that the catalog updates that represent the beginning and end of an aborted
+ * index build are timestamped. The oplog should contain two entries startIndexBuild and
+ * abortIndexBuild. We will inspect the catalog at the timestamp corresponding to each of these
+ * oplog entries.
+ */
+TEST_F(StorageTimestampTest, TimestampAbortIndexBuild) {
+ auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
+ auto durableCatalog = storageEngine->getCatalog();
+
+ // Create config.system.indexBuilds collection to store commit quorum value during index
+ // building.
+ ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(
+ _opCtx, NamespaceString::kIndexBuildEntryNamespace));
+ ASSERT_OK(
+ createCollection(_opCtx,
+ NamespaceString::kIndexBuildEntryNamespace.db().toString(),
+ BSON("create" << NamespaceString::kIndexBuildEntryNamespace.coll())));
+
+ NamespaceString nss("unittests.timestampAbortIndexBuild");
+ reset(nss);
+
+ std::vector<std::string> origIdents;
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
+
+ auto insertTimestamp1 = _clock->tickClusterTime(1);
+ auto insertTimestamp2 = _clock->tickClusterTime(1);
+
+ // Insert two documents with the same value for field 'a' so that
+ // we will fail to create a unique index.
+ WriteUnitOfWork wuow(_opCtx);
+ insertDocument(autoColl.getCollection(),
+ InsertStatement(BSON("_id" << 0 << "a" << 1),
+ insertTimestamp1.asTimestamp(),
+ _presentTerm));
+ insertDocument(autoColl.getCollection(),
+ InsertStatement(BSON("_id" << 1 << "a" << 1),
+ insertTimestamp2.asTimestamp(),
+ _presentTerm));
+ wuow.commit();
+ ASSERT_EQ(2, itCount(autoColl.getCollection()));
+
+ // Save the pre-state idents so we can capture the specific ident related to index
+ // creation.
+ origIdents = durableCatalog->getAllIdents(_opCtx);
+
+ // Ensure we have a committed snapshot to avoid ReadConcernMajorityNotAvailableYet
+ // error at the beginning of the the collection scan phase.
+ auto snapshotManager = storageEngine->getSnapshotManager();
+ snapshotManager->setCommittedSnapshot(insertTimestamp2.asTimestamp());
+ }
+
+ {
+ // Disable index build commit quorum as we don't have support of replication subsystem
+ // for voting.
+ auto index1 = BSON("v" << kIndexVersion << "key" << BSON("a" << 1) << "name"
+ << "a_1"
+ << "unique" << true);
+ auto createIndexesCmdObj =
+ BSON("createIndexes" << nss.coll() << "indexes" << BSON_ARRAY(index1) << "commitQuorum"
+ << 0);
+
+ DBDirectClient client(_opCtx);
+ BSONObj result;
+ ASSERT_FALSE(client.runCommand(nss.db().toString(), createIndexesCmdObj, result));
+ ASSERT_EQUALS(ErrorCodes::DuplicateKey, getStatusFromCommandResult(result));
+ }
+
+ // Confirm that startIndexBuild and abortIndexBuild oplog entries have been written to the
+ // oplog.
+ auto indexStartDocument =
+ queryOplog(BSON("ns" << nss.db() + ".$cmd"
+ << "o.startIndexBuild" << nss.coll() << "o.indexes.0.name"
+ << "a_1"));
+ auto indexStartTs = indexStartDocument["ts"].timestamp();
+ auto indexAbortDocument =
+ queryOplog(BSON("ns" << nss.db() + ".$cmd"
+ << "o.abortIndexBuild" << nss.coll() << "o.indexes.0.name"
+ << "a_1"));
+ auto indexAbortTs = indexAbortDocument["ts"].timestamp();
+
+ // Check index state in catalog at oplog entry times for both startIndexBuild and
+ // abortIndexBuild.
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
+ RecordId catalogId = autoColl.getCollection()->getCatalogId();
+
+ // We expect one new one new index ident during this index build.
+ assertRenamedCollectionIdentsAtTimestamp(
+ durableCatalog, origIdents, /*expectedNewIndexIdents*/ 1, indexStartTs);
+ ASSERT_FALSE(
+ getIndexMetaData(getMetaDataAtTime(durableCatalog, catalogId, indexStartTs), "a_1").ready);
+
+ // We expect all new idents to be removed after the index build has aborted.
+ assertRenamedCollectionIdentsAtTimestamp(
+ durableCatalog, origIdents, /*expectedNewIndexIdents*/ 0, indexAbortTs);
+ assertIndexMetaDataMissing(getMetaDataAtTime(durableCatalog, catalogId, indexAbortTs), "a_1");
+
+ // Assert that the index build is removed from config.system.indexBuilds collection after
+ // completion.
+ {
+ AutoGetCollectionForRead collection(_opCtx, NamespaceString::kIndexBuildEntryNamespace);
+ ASSERT_TRUE(collection);
+
+ // At the commitIndexBuild entry time, the index build be still be present in the
+ // indexBuilds collection.
+ {
+ OneOffRead oor(_opCtx, indexAbortTs);
+ // Fails if the collection is empty.
+ findOne(collection.getCollection());
+ }
+
+ // After the index build has finished, we should not see the doc in the indexBuilds
+ // collection.
+ ASSERT_EQUALS(0, itCount(collection.getCollection()));
+ }
+}
+
+TEST_F(StorageTimestampTest, TimestampIndexDropsWildcard) {
+ auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
+ auto durableCatalog = storageEngine->getCatalog();
+
+ NamespaceString nss("unittests.timestampIndexDrops");
+ reset(nss);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
+ CollectionWriter coll(_opCtx, autoColl);
+
+ const LogicalTime insertTimestamp = _clock->tickClusterTime(1);
+ {
+ WriteUnitOfWork wuow(_opCtx);
+ insertDocument(autoColl.getCollection(),
+ InsertStatement(BSON("_id" << 0 << "a" << 1 << "b" << 2 << "c" << 3),
+ insertTimestamp.asTimestamp(),
+ _presentTerm));
+ wuow.commit();
+ ASSERT_EQ(1, itCount(autoColl.getCollection()));
+ }
+
+
+ const Timestamp beforeIndexBuild = _clock->tickClusterTime(1).asTimestamp();
+
+ // Save the pre-state idents so we can capture the specific ident related to index
+ // creation.
+ std::vector<std::string> origIdents = durableCatalog->getAllIdents(_opCtx);
+
+ std::vector<Timestamp> afterCreateTimestamps;
+ std::vector<std::string> indexIdents;
+ // Create an index and get the ident for each index.
+ for (auto key : {"a", "b", "c"}) {
+ createIndex(coll, str::stream() << key << "_1", BSON(key << 1));
+
+ // Timestamps at the completion of each index build.
+ afterCreateTimestamps.push_back(_clock->tickClusterTime(1).asTimestamp());
+
+ // Add the new ident to the vector and reset the current idents.
+ indexIdents.push_back(getNewIndexIdentAtTime(durableCatalog, origIdents, Timestamp::min()));
+ origIdents = durableCatalog->getAllIdents(_opCtx);
+ }
+
+ // Ensure each index is visible at the correct timestamp, and not before.
+ for (size_t i = 0; i < indexIdents.size(); i++) {
+ auto beforeTs = (i == 0) ? beforeIndexBuild : afterCreateTimestamps[i - 1];
+ assertIdentsMissingAtTimestamp(durableCatalog, "", indexIdents[i], beforeTs);
+ assertIdentsExistAtTimestamp(durableCatalog, "", indexIdents[i], afterCreateTimestamps[i]);
+ }
+
+ const auto currentTime = _clock->getTime();
+ const LogicalTime beforeDropTs = currentTime.clusterTime();
+
+ // Drop all of the indexes.
+ dropIndexes(_opCtx, nss, boost::none, "*");
+
+ // Assert that each index is dropped individually and with its own timestamp. The order of
+ // dropping and creating are not guaranteed to be the same, but assert all of the created
+ // indexes were also dropped.
+ size_t nIdents = indexIdents.size();
+ for (size_t i = 0; i < nIdents; i++) {
+ OneOffRead oor(_opCtx, beforeDropTs.addTicks(i + 1).asTimestamp());
+
+ auto ident = getDroppedIndexIdent(durableCatalog, origIdents);
+ indexIdents.erase(std::remove(indexIdents.begin(), indexIdents.end(), ident));
+
+ origIdents = durableCatalog->getAllIdents(_opCtx);
+ }
+ ASSERT_EQ(indexIdents.size(), 0ul) << "Dropped idents should match created idents";
+}
+
+TEST_F(StorageTimestampTest, TimestampIndexDropsListed) {
+ auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
+ auto durableCatalog = storageEngine->getCatalog();
+
+ NamespaceString nss("unittests.timestampIndexDrops");
+ reset(nss);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
+ CollectionWriter coll(_opCtx, autoColl);
+
+ const LogicalTime insertTimestamp = _clock->tickClusterTime(1);
+ {
+ WriteUnitOfWork wuow(_opCtx);
+ insertDocument(autoColl.getCollection(),
+ InsertStatement(BSON("_id" << 0 << "a" << 1 << "b" << 2 << "c" << 3),
+ insertTimestamp.asTimestamp(),
+ _presentTerm));
+ wuow.commit();
+ ASSERT_EQ(1, itCount(autoColl.getCollection()));
+ }
+
+
+ const Timestamp beforeIndexBuild = _clock->tickClusterTime(1).asTimestamp();
+
+ // Save the pre-state idents so we can capture the specific ident related to index
+ // creation.
+ std::vector<std::string> origIdents = durableCatalog->getAllIdents(_opCtx);
+
+ std::vector<Timestamp> afterCreateTimestamps;
+ std::vector<std::string> indexIdents;
+ // Create an index and get the ident for each index.
+ for (auto key : {"a", "b", "c"}) {
+ createIndex(coll, str::stream() << key << "_1", BSON(key << 1));
+
+ // Timestamps at the completion of each index build.
+ afterCreateTimestamps.push_back(_clock->tickClusterTime(1).asTimestamp());
+
+ // Add the new ident to the vector and reset the current idents.
+ indexIdents.push_back(getNewIndexIdentAtTime(durableCatalog, origIdents, Timestamp::min()));
+ origIdents = durableCatalog->getAllIdents(_opCtx);
+ }
+
+ // Ensure each index is visible at the correct timestamp, and not before.
+ for (size_t i = 0; i < indexIdents.size(); i++) {
+ auto beforeTs = (i == 0) ? beforeIndexBuild : afterCreateTimestamps[i - 1];
+ assertIdentsMissingAtTimestamp(durableCatalog, "", indexIdents[i], beforeTs);
+ assertIdentsExistAtTimestamp(durableCatalog, "", indexIdents[i], afterCreateTimestamps[i]);
+ }
+
+ const auto currentTime = _clock->getTime();
+ const LogicalTime beforeDropTs = currentTime.clusterTime();
+
+ // Drop all of the indexes.
+ dropIndexes(_opCtx, nss, boost::none, std::vector<std::string>{"a_1", "b_1", "c_1"});
+
+ // Assert that each index is dropped individually and with its own timestamp. The order of
+ // dropping and creating are not guaranteed to be the same, but assert all of the created
+ // indexes were also dropped.
+ size_t nIdents = indexIdents.size();
+ for (size_t i = 0; i < nIdents; i++) {
+ OneOffRead oor(_opCtx, beforeDropTs.addTicks(i + 1).asTimestamp());
+
+ auto ident = getDroppedIndexIdent(durableCatalog, origIdents);
+ indexIdents.erase(std::remove(indexIdents.begin(), indexIdents.end(), ident));
+
+ origIdents = durableCatalog->getAllIdents(_opCtx);
+ }
+ ASSERT_EQ(indexIdents.size(), 0ul) << "Dropped idents should match created idents";
+}
+
+/**
+ * Test specific OplogApplierImpl subclass that allows for custom applyOplogBatchPerWorker to be run
+ * during multiApply.
+ */
+class SecondaryReadsDuringBatchApplicationAreAllowedApplier : public repl::OplogApplierImpl {
+public:
+ SecondaryReadsDuringBatchApplicationAreAllowedApplier(
+ executor::TaskExecutor* executor,
+ repl::OplogBuffer* oplogBuffer,
+ Observer* observer,
+ repl::ReplicationCoordinator* replCoord,
+ repl::ReplicationConsistencyMarkers* consistencyMarkers,
+ repl::StorageInterface* storageInterface,
+ const OplogApplier::Options& options,
+ ThreadPool* writerPool,
+ OperationContext* opCtx,
+ Promise<bool>* promise,
+ stdx::future<bool>* taskFuture)
+ : repl::OplogApplierImpl(executor,
+ oplogBuffer,
+ observer,
+ replCoord,
+ consistencyMarkers,
+ storageInterface,
+ options,
+ writerPool),
+ _testOpCtx(opCtx),
+ _promise(promise),
+ _taskFuture(taskFuture) {}
+
+ Status applyOplogBatchPerWorker(OperationContext* opCtx,
+ std::vector<const repl::OplogEntry*>* operationsToApply,
+ WorkerMultikeyPathInfo* pathInfo,
+ bool isDataConsistent) override;
+
+private:
+ // Pointer to the test's op context. This is distinct from the op context used in
+ // applyOplogBatchPerWorker.
+ OperationContext* _testOpCtx;
+ Promise<bool>* _promise;
+ stdx::future<bool>* _taskFuture;
+};
+
+
+// This apply operation function will block until the reader has tried acquiring a collection lock.
+// This returns BadValue statuses instead of asserting so that the worker threads can cleanly exit
+// and this test case fails without crashing the entire suite.
+Status SecondaryReadsDuringBatchApplicationAreAllowedApplier::applyOplogBatchPerWorker(
+ OperationContext* opCtx,
+ std::vector<const repl::OplogEntry*>* operationsToApply,
+ WorkerMultikeyPathInfo* pathInfo,
+ const bool isDataConsistent) {
+ if (!_testOpCtx->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_X)) {
+ return {ErrorCodes::BadValue, "Batch applied was not holding PBWM lock in MODE_X"};
+ }
+
+ // Insert the document. A reader without a PBWM lock should not see it yet.
+ const bool dataIsConsistent = true;
+ auto status = OplogApplierImpl::applyOplogBatchPerWorker(
+ opCtx, operationsToApply, pathInfo, dataIsConsistent);
+ if (!status.isOK()) {
+ return status;
+ }
+
+ // Signals the reader to acquire a collection read lock.
+ _promise->emplaceValue(true);
+
+ // Block while holding the PBWM lock until the reader is done.
+ if (!_taskFuture->get()) {
+ return {ErrorCodes::BadValue, "Client was holding PBWM lock in MODE_IS"};
+ }
+ return Status::OK();
+}
+
+TEST_F(StorageTimestampTest, IndexBuildsResolveErrorsDuringStateChangeToPrimary) {
+ NamespaceString nss("unittests.timestampIndexBuilds");
+ reset(nss);
+
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
+ CollectionWriter collection(_opCtx, autoColl);
+
+ // Indexing of parallel arrays is not allowed, so these are deemed "bad".
+ const auto badDoc1 = BSON("_id" << 0 << "a" << BSON_ARRAY(0 << 1) << "b" << BSON_ARRAY(0 << 1));
+ const auto badDoc2 = BSON("_id" << 1 << "a" << BSON_ARRAY(2 << 3) << "b" << BSON_ARRAY(2 << 3));
+ const auto badDoc3 = BSON("_id" << 2 << "a" << BSON_ARRAY(4 << 5) << "b" << BSON_ARRAY(4 << 5));
+
+ // NOTE: This test does not test any timestamp reads.
+ const LogicalTime insert1 = _clock->tickClusterTime(1);
+ {
+ LOGV2(22505, "inserting {badDoc1}", "badDoc1"_attr = badDoc1);
+ WriteUnitOfWork wuow(_opCtx);
+ insertDocument(collection.get(),
+ InsertStatement(badDoc1, insert1.asTimestamp(), _presentTerm));
+ wuow.commit();
+ }
+
+ const LogicalTime insert2 = _clock->tickClusterTime(1);
+ {
+ LOGV2(22506, "inserting {badDoc2}", "badDoc2"_attr = badDoc2);
+ WriteUnitOfWork wuow(_opCtx);
+ insertDocument(collection.get(),
+ InsertStatement(badDoc2, insert2.asTimestamp(), _presentTerm));
+ wuow.commit();
+ }
+
+ const IndexCatalogEntry* buildingIndex = nullptr;
+ MultiIndexBlock indexer;
+ ScopeGuard abortOnExit(
+ [&] { indexer.abortIndexBuild(_opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn); });
+
+ // Provide a build UUID, indicating that this is a two-phase index build.
+ const auto buildUUID = UUID::gen();
+ indexer.setTwoPhaseBuildUUID(buildUUID);
+
+ const LogicalTime indexInit = _clock->tickClusterTime(3);
+
+ // First, simulate being a secondary. Indexing errors are ignored.
+ {
+ ASSERT_OK(_coordinatorMock->setFollowerMode({repl::MemberState::MS::RS_SECONDARY}));
+ _coordinatorMock->alwaysAllowWrites(false);
+ repl::UnreplicatedWritesBlock unreplicatedWrites(_opCtx);
+
+ {
+ TimestampBlock tsBlock(_opCtx, indexInit.asTimestamp());
+
+ auto swSpecs = indexer.init(
+ _opCtx,
+ collection,
+ {BSON("v" << 2 << "name"
+ << "a_1_b_1"
+ << "ns" << collection->ns().ns() << "key" << BSON("a" << 1 << "b" << 1))},
+ MultiIndexBlock::makeTimestampedIndexOnInitFn(_opCtx, collection.get()));
+ ASSERT_OK(swSpecs.getStatus());
+ }
+
+ auto indexCatalog = collection->getIndexCatalog();
+ buildingIndex = indexCatalog->getEntry(
+ indexCatalog->findIndexByName(_opCtx, "a_1_b_1", /* includeUnfinished */ true));
+ ASSERT(buildingIndex);
+
+ ASSERT_OK(indexer.insertAllDocumentsInCollection(_opCtx, collection.get()));
+
+ ASSERT_TRUE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
+
+ // There should be one skipped record from the collection scan.
+ ASSERT_FALSE(
+ buildingIndex->indexBuildInterceptor()->getSkippedRecordTracker()->areAllRecordsApplied(
+ _opCtx));
+ }
+
+ // As a primary, stop ignoring indexing errors.
+ ASSERT_OK(_coordinatorMock->setFollowerMode({repl::MemberState::MS::RS_PRIMARY}));
+
+ {
+ // This write will not succeed because the node is a primary and the document is not
+ // indexable.
+ LOGV2(22507, "attempting to insert {badDoc3}", "badDoc3"_attr = badDoc3);
+ WriteUnitOfWork wuow(_opCtx);
+ ASSERT_THROWS_CODE(
+ collection->insertDocument(
+ _opCtx,
+ InsertStatement(badDoc3, indexInit.addTicks(1).asTimestamp(), _presentTerm),
+ /* opDebug */ nullptr,
+ /* noWarn */ false),
+ DBException,
+ ErrorCodes::CannotIndexParallelArrays);
+ wuow.commit();
+ }
+
+ // There should skipped records from failed collection scans and writes.
+ ASSERT_FALSE(
+ buildingIndex->indexBuildInterceptor()->getSkippedRecordTracker()->areAllRecordsApplied(
+ _opCtx));
+ // This fails because the bad record is still invalid.
+ auto status = indexer.retrySkippedRecords(_opCtx, collection.get());
+ ASSERT_EQ(status.code(), ErrorCodes::CannotIndexParallelArrays);
+
+ ASSERT_FALSE(
+ buildingIndex->indexBuildInterceptor()->getSkippedRecordTracker()->areAllRecordsApplied(
+ _opCtx));
+ ASSERT_TRUE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
+
+ // Update one documents to be valid, and delete the other. These modifications are written
+ // to the side writes table and must be drained.
+ Helpers::upsert(_opCtx, collection->ns().ns(), BSON("_id" << 0 << "a" << 1 << "b" << 1));
+ {
+ RecordId badRecord =
+ Helpers::findOne(_opCtx, collection.get(), BSON("_id" << 1), false /* requireIndex */);
+ WriteUnitOfWork wuow(_opCtx);
+ collection->deleteDocument(_opCtx, kUninitializedStmtId, badRecord, nullptr);
+ wuow.commit();
+ }
+
+ ASSERT_FALSE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
+ ASSERT_OK(indexer.drainBackgroundWrites(_opCtx,
+ RecoveryUnit::ReadSource::kNoTimestamp,
+ IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
+
+
+ // This succeeds because the bad documents are now either valid or removed.
+ ASSERT_OK(indexer.retrySkippedRecords(_opCtx, collection.get()));
+ ASSERT_TRUE(
+ buildingIndex->indexBuildInterceptor()->getSkippedRecordTracker()->areAllRecordsApplied(
+ _opCtx));
+ ASSERT_TRUE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
+ ASSERT_OK(indexer.checkConstraints(_opCtx, collection.get()));
+
+ {
+ WriteUnitOfWork wuow(_opCtx);
+ ASSERT_OK(
+ indexer.commit(_opCtx,
+ collection.getWritableCollection(),
+ [&](const BSONObj& indexSpec) {
+ _opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
+ _opCtx, collection->ns(), collection->uuid(), indexSpec, false);
+ },
+ MultiIndexBlock::kNoopOnCommitFn));
+ wuow.commit();
+ }
+ abortOnExit.dismiss();
+}
+
+TEST_F(StorageTimestampTest, SecondaryReadsDuringBatchApplicationAreAllowed) {
+ ASSERT(_opCtx->getServiceContext()->getStorageEngine()->supportsReadConcernSnapshot());
+
+ NamespaceString ns("unittest.secondaryReadsDuringBatchApplicationAreAllowed");
+ reset(ns);
+ UUID uuid = UUID::gen();
+ {
+ AutoGetCollectionForRead autoColl(_opCtx, ns);
+ uuid = autoColl.getCollection()->uuid();
+ ASSERT_EQ(itCount(autoColl.getCollection()), 0);
+ }
+
+ // Returns true when the batch has started, meaning the applier is holding the PBWM lock.
+ // Will return false if the lock was not held.
+ auto batchInProgress = makePromiseFuture<bool>();
+ // Attempt to read when in the middle of a batch.
+ stdx::packaged_task<bool()> task([&] {
+ Client::initThread(getThreadName());
+ auto readOp = cc().makeOperationContext();
+
+ // Wait for the batch to start or fail.
+ if (!batchInProgress.future.get()) {
+ return false;
+ }
+ AutoGetCollectionForRead autoColl(readOp.get(), ns);
+ return !readOp->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_IS);
+ });
+ auto taskFuture = task.get_future();
+ stdx::thread taskThread{std::move(task)};
+
+ ScopeGuard joinGuard([&] {
+ batchInProgress.promise.emplaceValue(false);
+ taskThread.join();
+ });
+
+ // Make a simple insert operation.
+ BSONObj doc0 = BSON("_id" << 0 << "a" << 0);
+ auto insertOp = repl::OplogEntry(BSON("ts" << _futureTs << "t" << 1LL << "v" << 2 << "op"
+ << "i"
+ << "ns" << ns.ns() << "ui" << uuid << "wall"
+ << Date_t() << "o" << doc0));
+ DoNothingOplogApplierObserver observer;
+ // Apply the operation.
+ auto storageInterface = repl::StorageInterface::get(_opCtx);
+ auto writerPool = repl::makeReplWriterPool(1);
+ SecondaryReadsDuringBatchApplicationAreAllowedApplier oplogApplier(
+ nullptr, // task executor. not required for multiApply().
+ nullptr, // oplog buffer. not required for multiApply().
+ &observer,
+ _coordinatorMock,
+ _consistencyMarkers,
+ storageInterface,
+ repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary),
+ writerPool.get(),
+ _opCtx,
+ &(batchInProgress.promise),
+ &taskFuture);
+ auto lastOpTime = unittest::assertGet(oplogApplier.applyOplogBatch(_opCtx, {insertOp}));
+ ASSERT_EQ(insertOp.getOpTime(), lastOpTime);
+
+ joinGuard.dismiss();
+ taskThread.join();
+
+ // Read on the local snapshot to verify the document was inserted.
+ AutoGetCollectionForRead autoColl(_opCtx, ns);
+ assertDocumentAtTimestamp(autoColl.getCollection(), _futureTs, doc0);
+}
+
+/**
+ * This test exercises the code path in which a primary performs an index build via oplog
+ * application of a createIndexes oplog entry. In this code path, a primary timestamps the
+ * index build through applying the oplog entry, rather than creating an oplog entry.
+ */
+TEST_F(StorageTimestampTest, TimestampIndexOplogApplicationOnPrimary) {
+ // Index builds expect a non-empty oplog and a valid committed snapshot.
+ {
+ Lock::GlobalLock lk(_opCtx, MODE_IX);
+ WriteUnitOfWork wuow(_opCtx);
+ auto service = _opCtx->getServiceContext();
+ service->getOpObserver()->onOpMessage(_opCtx, BSONObj());
+ wuow.commit();
+
+ auto snapshotManager = service->getStorageEngine()->getSnapshotManager();
+ auto lastAppliedOpTime =
+ repl::ReplicationCoordinator::get(service)->getMyLastAppliedOpTime();
+ snapshotManager->setCommittedSnapshot(lastAppliedOpTime.getTimestamp());
+ }
+
+ // In order for oplog application to assign timestamps, we must be in non-replicated mode
+ // and disable document validation.
+ repl::UnreplicatedWritesBlock uwb(_opCtx);
+ DisableDocumentValidation validationDisabler(_opCtx);
+
+ std::string dbName = "unittest";
+ NamespaceString nss(dbName, "oplogApplicationOnPrimary");
+ BSONObj doc = BSON("_id" << 1 << "field" << 1);
+
+ const LogicalTime setupStart = _clock->tickClusterTime(1);
+
+ UUID collUUID = UUID::gen();
+ {
+ // Create the collection and insert a document.
+ reset(nss);
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ collUUID = autoColl.getCollection()->uuid();
+ WriteUnitOfWork wuow(_opCtx);
+ insertDocument(autoColl.getCollection(),
+ InsertStatement(doc, setupStart.asTimestamp(), _presentTerm));
+ wuow.commit();
+ }
+
+
+ {
+ // Sanity check everything exists.
+ AutoGetCollectionForReadCommand coll(_opCtx, nss);
+ ASSERT(coll);
+
+ const auto currentTime = _clock->getTime();
+ const auto presentTs = currentTime.clusterTime().asTimestamp();
+ assertDocumentAtTimestamp(coll.getCollection(), presentTs, doc);
+ }
+
+ // Simulate a scenario where the node is a primary, but does not accept writes. This is
+ // the only scenario in which a primary can do an index build via oplog application, since
+ // the applyOps command no longer allows createIndexes (see SERVER-41554).
+ _coordinatorMock->alwaysAllowWrites(false);
+ {
+ const auto beforeBuildTime = _clock->tickClusterTime(2);
+ const auto startBuildTs = beforeBuildTime.addTicks(1).asTimestamp();
+
+ // Grab the existing idents to identify the ident created by the index build.
+ auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
+ auto durableCatalog = storageEngine->getCatalog();
+ std::vector<std::string> origIdents;
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
+ origIdents = durableCatalog->getAllIdents(_opCtx);
+ }
+
+ auto keyPattern = BSON("field" << 1);
+ auto startBuildOpTime = repl::OpTime(startBuildTs, _presentTerm);
+ UUID indexBuildUUID = UUID::gen();
+
+ // Wait for the index build thread to start the collection scan before proceeding with
+ // checking the catalog and applying the commitIndexBuild oplog entry.
+ // There is a potential race between applying the commitIndexBuild oplog entry and the
+ // transitioning the index build thread's ReplIndexBuildState from kSetup to
+ // kInProgress. This is due to the commit retry logic using the ClockSourceMock, rather
+ // than an actual system clock that advances automatically, through OperationContext's
+ // waitFor() function.
+ {
+ FailPointEnableBlock fpb("hangAfterStartingIndexBuild");
+
+ auto start = repl::makeStartIndexBuildOplogEntry(
+ startBuildOpTime, nss, "field_1", keyPattern, collUUID, indexBuildUUID);
+ const bool dataIsConsistent = true;
+ ASSERT_OK(repl::applyOplogEntryOrGroupedInserts(
+ _opCtx, &start, repl::OplogApplication::Mode::kSecondary, dataIsConsistent));
+
+ // We cannot use the OperationContext to wait for the thread to reach the fail point
+ // because it also uses the ClockSourceMock.
+ fpb->waitForTimesEntered(Interruptible::notInterruptible(),
+ fpb.initialTimesEntered() + 1);
+ }
+
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
+ const std::string indexIdent =
+ getNewIndexIdentAtTime(durableCatalog, origIdents, Timestamp::min());
+ assertIdentsMissingAtTimestamp(
+ durableCatalog, "", indexIdent, beforeBuildTime.asTimestamp());
+ assertIdentsExistAtTimestamp(durableCatalog, "", indexIdent, startBuildTs);
+
+ // The index has not committed yet, so it is not ready.
+ RecordId catalogId = autoColl.getCollection()->getCatalogId();
+ ASSERT_FALSE(getIndexMetaData(
+ getMetaDataAtTime(durableCatalog, catalogId, startBuildTs), "field_1")
+ .ready);
+ } // release read lock so commit index build oplog entry can take its own locks.
+
+ auto commit = repl::makeCommitIndexBuildOplogEntry(
+ startBuildOpTime, nss, "field_1", keyPattern, collUUID, indexBuildUUID);
+ const bool dataIsConsistent = true;
+ ASSERT_OK(repl::applyOplogEntryOrGroupedInserts(
+ _opCtx, &commit, repl::OplogApplication::Mode::kSecondary, dataIsConsistent));
+
+ // Reacquire read lock to check index metadata.
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
+ RecordId catalogId = autoColl.getCollection()->getCatalogId();
+ ASSERT_TRUE(
+ getIndexMetaData(getMetaDataAtTime(durableCatalog, catalogId, startBuildTs), "field_1")
+ .ready);
+ }
+}
+
+TEST_F(StorageTimestampTest, ViewCreationSeparateTransaction) {
+ auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
+ auto durableCatalog = storageEngine->getCatalog();
+
+ const NamespaceString backingCollNss("unittests.backingColl");
+ reset(backingCollNss);
+
+ const NamespaceString viewNss("unittests.view");
+ const NamespaceString systemViewsNss("unittests.system.views");
+
+ ASSERT_OK(createCollection(_opCtx,
+ viewNss.db().toString(),
+ BSON("create" << viewNss.coll() << "pipeline" << BSONArray()
+ << "viewOn" << backingCollNss.coll())));
+
+ const Timestamp systemViewsCreateTs = queryOplog(BSON("op"
+ << "c"
+ << "ns" << (viewNss.db() + ".$cmd")
+ << "o.create"
+ << "system.views"))["ts"]
+ .timestamp();
+ const Timestamp viewCreateTs = queryOplog(BSON("op"
+ << "i"
+ << "ns" << systemViewsNss.ns() << "o._id"
+ << viewNss.ns()))["ts"]
+ .timestamp();
+
+ {
+ Lock::GlobalRead read(_opCtx);
+ AutoGetCollection autoColl(_opCtx, systemViewsNss, LockMode::MODE_IS);
+ RecordId catalogId = autoColl.getCollection()->getCatalogId();
+
+ auto systemViewsMd = getMetaDataAtTime(
+ durableCatalog, catalogId, Timestamp(systemViewsCreateTs.asULL() - 1));
+ ASSERT(systemViewsMd == nullptr)
+ << systemViewsNss
+ << " incorrectly exists before creation. CreateTs: " << systemViewsCreateTs;
+
+ systemViewsMd = getMetaDataAtTime(durableCatalog, catalogId, systemViewsCreateTs);
+ TenantNamespace tenantNs = systemViewsMd->tenantNs;
+ ASSERT_EQ(systemViewsNss.ns(), tenantNs.getNss().ns());
+
+ assertDocumentAtTimestamp(autoColl.getCollection(), systemViewsCreateTs, BSONObj());
+ assertDocumentAtTimestamp(autoColl.getCollection(),
+ viewCreateTs,
+ BSON("_id" << viewNss.ns() << "viewOn" << backingCollNss.coll()
+ << "pipeline" << BSONArray()));
+ }
+}
+
+TEST_F(StorageTimestampTest, CreateCollectionWithSystemIndex) {
+ NamespaceString nss("admin.system.users");
+
+ { ASSERT_FALSE(AutoGetCollectionForReadCommand(_opCtx, nss).getCollection()); }
+
+ ASSERT_OK(createCollection(_opCtx, nss.db().toString(), BSON("create" << nss.coll())));
+
+ RecordId catalogId;
+ {
+ AutoGetCollectionForReadCommand coll(_opCtx, nss);
+ ASSERT(coll.getCollection());
+ catalogId = coll.getCollection()->getCatalogId();
+ }
+
+ BSONObj result = queryOplog(BSON("op"
+ << "c"
+ << "ns" << nss.getCommandNS().ns() << "o.create"
+ << nss.coll()));
+ repl::OplogEntry op(result);
+ // The logOp() call for createCollection should have timestamp 'futureTs', which will also
+ // be the timestamp at which we do the write which creates the collection. Thus we expect
+ // the collection to appear at 'futureTs' and not before.
+ ASSERT_EQ(op.getTimestamp(), _futureTs) << op.toBSONForLogging();
+
+ // The index build emits three oplog entries.
+ Timestamp indexStartTs;
+ Timestamp indexCreateTs;
+ Timestamp indexCompleteTs;
+ // Empty collections generate createIndexes oplog entry even if the node
+ // supports 2 phase index build.
+ indexStartTs = op.getTimestamp();
+ indexCreateTs = repl::OplogEntry(queryOplog(BSON("op"
+ << "c"
+ << "ns" << nss.getCommandNS().ns()
+ << "o.createIndexes" << nss.coll() << "o.name"
+ << "user_1_db_1")))
+ .getTimestamp();
+ indexCompleteTs = indexCreateTs;
+
+ assertNamespaceInIdents(nss, _pastTs, false);
+ assertNamespaceInIdents(nss, _presentTs, false);
+ assertNamespaceInIdents(nss, _futureTs, true);
+ assertNamespaceInIdents(nss, indexStartTs, true);
+ if (!indexCreateTs.isNull()) {
+ assertNamespaceInIdents(nss, indexCreateTs, true);
+ }
+ assertNamespaceInIdents(nss, indexCompleteTs, true);
+ assertNamespaceInIdents(nss, _nullTs, true);
+
+ ASSERT_GT(indexCompleteTs, _futureTs);
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
+ auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
+ auto durableCatalog = storageEngine->getCatalog();
+ auto indexIdent = durableCatalog->getIndexIdent(_opCtx, catalogId, "user_1_db_1");
+ assertIdentsMissingAtTimestamp(durableCatalog, "", indexIdent, _pastTs);
+ assertIdentsMissingAtTimestamp(durableCatalog, "", indexIdent, _presentTs);
+ assertIdentsMissingAtTimestamp(durableCatalog, "", indexIdent, _futureTs);
+ // This is the timestamp of the startIndexBuild oplog entry, which is timestamped before the
+ // index is created as part of the createIndexes oplog entry.
+ assertIdentsMissingAtTimestamp(durableCatalog, "", indexIdent, indexStartTs);
+ if (!indexCreateTs.isNull()) {
+ assertIdentsExistAtTimestamp(durableCatalog, "", indexIdent, indexCreateTs);
+ }
+ assertIdentsExistAtTimestamp(durableCatalog, "", indexIdent, indexCompleteTs);
+ assertIdentsExistAtTimestamp(durableCatalog, "", indexIdent, _nullTs);
+}
+
+class RetryableFindAndModifyTest : public StorageTimestampTest {
+public:
+ const StringData dbName = "unittest"_sd;
+ const BSONObj oldObj = BSON("_id" << 0 << "a" << 1);
+
+ RetryableFindAndModifyTest() : nss(dbName, "retryableFindAndModifyTest") {
+ auto service = _opCtx->getServiceContext();
+ auto sessionCatalog = SessionCatalog::get(service);
+ sessionCatalog->reset_forTest();
+ MongoDSessionCatalog::onStepUp(_opCtx);
+
+ reset(nss);
+ UUID ui = UUID::gen();
+
+ {
+ AutoGetCollection coll(_opCtx, nss, LockMode::MODE_IX);
+ ASSERT(coll);
+ ui = coll->uuid();
+ }
+
+ const auto currentTime = _clock->getTime();
+ currentTs = currentTime.clusterTime().asTimestamp();
+ insertTs = currentTime.clusterTime().asTimestamp() + 1;
+ beforeOplogTs = insertTs + 1;
+ oplogTs = insertTs + 2;
+ // This test does not run a real ReplicationCoordinator, so must advance the snapshot
+ // manager manually.
+ auto storageEngine = cc().getServiceContext()->getStorageEngine();
+ storageEngine->getSnapshotManager()->setLastApplied(insertTs);
+
+ const auto sessionId = makeLogicalSessionIdForTest();
+ _opCtx->setLogicalSessionId(sessionId);
+ const auto txnNumber = 10;
+ _opCtx->setTxnNumber(txnNumber);
+
+ ocs.emplace(_opCtx);
+
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ WriteUnitOfWork wunit(_opCtx);
+ insertDocument(autoColl.getCollection(), InsertStatement(oldObj));
+ wunit.commit();
+ }
+ assertOplogDocumentExistsAtTimestamp(BSON("ts" << insertTs << "op"
+ << "i"),
+ insertTs,
+ true);
+
+ storageEngine->getSnapshotManager()->setLastApplied(insertTs);
+
+ auto txnParticipant = TransactionParticipant::get(_opCtx);
+ ASSERT(txnParticipant);
+ // Start a retryable write.
+ txnParticipant.beginOrContinue(
+ _opCtx, {txnNumber}, boost::none /* autocommit */, boost::none /* startTransaction */);
+ }
+
+protected:
+ NamespaceString nss;
+ Timestamp currentTs;
+ Timestamp insertTs;
+ Timestamp beforeOplogTs;
+ Timestamp oplogTs;
+
+ boost::optional<MongoDOperationContextSession> ocs;
+};
+
+TEST_F(RetryableFindAndModifyTest, RetryableFindAndModifyUpdate) {
+ RAIIServerParameterControllerForTest ffRaii("featureFlagRetryableFindAndModify", true);
+ RAIIServerParameterControllerForTest storeImageInSideCollection(
+ "storeFindAndModifyImagesInSideCollection", true);
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
+ CollectionWriter collection(_opCtx, autoColl);
+ const auto newObj = BSON("_id" << 0 << "a" << 1 << "b" << 1);
+ CollectionUpdateArgs args;
+ args.stmtIds = {1};
+ args.preImageDoc = oldObj;
+ args.updatedDoc = newObj;
+ args.storeDocOption = CollectionUpdateArgs::StoreDocOption::PreImage;
+ args.update = BSON("$set" << BSON("b" << 1));
+ args.criteria = BSON("_id" << 0);
+
+ {
+ auto cursor = collection->getCursor(_opCtx);
+ auto record = cursor->next();
+ invariant(record);
+ WriteUnitOfWork wuow(_opCtx);
+ collection->updateDocument(
+ _opCtx,
+ record->id,
+ Snapshotted<BSONObj>(_opCtx->recoveryUnit()->getSnapshotId(), oldObj),
+ newObj,
+ false,
+ nullptr,
+ &args);
+ wuow.commit();
+ }
+
+ // There should be no oplog entry at 'beforeOplogTs'.
+ const auto beforeOplogTsFilter = BSON("ts" << beforeOplogTs);
+ assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, currentTs, false);
+ assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, beforeOplogTs, false);
+ assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, oplogTs, false);
+
+ const auto oplogTsFilter = BSON("ts" << oplogTs << "op"
+ << "u");
+ assertOplogDocumentExistsAtTimestamp(oplogTsFilter, currentTs, false);
+ assertOplogDocumentExistsAtTimestamp(oplogTsFilter, beforeOplogTs, false);
+ assertOplogDocumentExistsAtTimestamp(oplogTsFilter, oplogTs, true);
+}
+
+TEST_F(RetryableFindAndModifyTest, RetryableFindAndModifyUpdateWithDamages) {
+ namespace mmb = mongo::mutablebson;
+ RAIIServerParameterControllerForTest ffRaii("featureFlagRetryableFindAndModify", true);
+ RAIIServerParameterControllerForTest storeImageInSideCollection(
+ "storeFindAndModifyImagesInSideCollection", true);
+ const auto bsonObj = BSON("_id" << 0 << "a" << 1);
+ // Create a new document representing BSONObj with the above contents.
+ mmb::Document doc(bsonObj, mmb::Document::kInPlaceEnabled);
+
+ mmb::DamageVector damages;
+ const char* source = nullptr;
+ size_t size = 0;
+ ASSERT_TRUE(doc.getInPlaceUpdates(&damages, &source, &size));
+
+ // Enable in-place mutation for this document
+ ASSERT_EQUALS(mmb::Document::kInPlaceEnabled, doc.getCurrentInPlaceMode());
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
+ CollectionWriter collection(_opCtx, autoColl);
+ const auto newObj = BSON("_id" << 0 << "a" << 0);
+ CollectionUpdateArgs args;
+ args.stmtIds = {1};
+ args.preImageDoc = oldObj;
+ args.updatedDoc = newObj;
+ args.storeDocOption = CollectionUpdateArgs::StoreDocOption::PreImage;
+ args.update = BSON("$set" << BSON("a" << 0));
+ args.criteria = BSON("_id" << 0);
+
+ {
+ Snapshotted<BSONObj> objSnapshot(_opCtx->recoveryUnit()->getSnapshotId(), oldObj);
+ const RecordData oldRec(objSnapshot.value().objdata(), objSnapshot.value().objsize());
+ Snapshotted<RecordData> recordSnapshot(objSnapshot.snapshotId(), oldRec);
+ auto cursor = collection->getCursor(_opCtx);
+ auto record = cursor->next();
+ invariant(record);
+ WriteUnitOfWork wuow(_opCtx);
+ const auto statusWith = collection->updateDocumentWithDamages(
+ _opCtx, record->id, std::move(recordSnapshot), source, damages, &args);
+ wuow.commit();
+ ASSERT_OK(statusWith.getStatus());
+ }
+
+ // There should be no oplog entry at 'beforeOplogTs'.
+ const auto beforeOplogTsFilter = BSON("ts" << beforeOplogTs);
+ assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, currentTs, false);
+ assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, beforeOplogTs, false);
+ assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, oplogTs, false);
+
+ const auto tsFilter = BSON("ts" << oplogTs << "op"
+ << "u");
+ assertOplogDocumentExistsAtTimestamp(tsFilter, currentTs, false);
+ assertOplogDocumentExistsAtTimestamp(tsFilter, beforeOplogTs, false);
+ assertOplogDocumentExistsAtTimestamp(tsFilter, oplogTs, true);
+}
+
+TEST_F(RetryableFindAndModifyTest, RetryableFindAndModifyDelete) {
+ RAIIServerParameterControllerForTest ffRaii("featureFlagRetryableFindAndModify", true);
+ RAIIServerParameterControllerForTest storeImageInSideCollection(
+ "storeFindAndModifyImagesInSideCollection", true);
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
+ CollectionWriter collection(_opCtx, autoColl);
+ const auto bsonObj = BSON("_id" << 0 << "a" << 1);
+
+ {
+ Snapshotted<BSONObj> objSnapshot(_opCtx->recoveryUnit()->getSnapshotId(), oldObj);
+ auto cursor = collection->getCursor(_opCtx);
+ auto record = cursor->next();
+ invariant(record);
+ WriteUnitOfWork wuow(_opCtx);
+ collection->deleteDocument(_opCtx,
+ objSnapshot,
+ 1,
+ record->id,
+ nullptr,
+ false,
+ false,
+ Collection::StoreDeletedDoc::On);
+ wuow.commit();
+ }
+
+ // There should be no oplog entry at 'beforeOplogTs'.
+ const auto beforeOplogTsFilter = BSON("ts" << beforeOplogTs);
+ assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, currentTs, false);
+ assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, beforeOplogTs, false);
+ assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, oplogTs, false);
+
+ const auto tsFilter = BSON("ts" << oplogTs << "op"
+ << "d");
+ assertOplogDocumentExistsAtTimestamp(tsFilter, currentTs, false);
+ assertOplogDocumentExistsAtTimestamp(tsFilter, beforeOplogTs, false);
+ assertOplogDocumentExistsAtTimestamp(tsFilter, oplogTs, true);
+}
+
+class MultiDocumentTransactionTest : public StorageTimestampTest {
+public:
+ const StringData dbName = "unittest"_sd;
+ const BSONObj doc = BSON("_id" << 1 << "TestValue" << 1);
+ const BSONObj docKey = BSON("_id" << 1);
+
+ MultiDocumentTransactionTest() : nss(dbName, "multiDocumentTransactionTest") {
+ auto service = _opCtx->getServiceContext();
+ auto sessionCatalog = SessionCatalog::get(service);
+ sessionCatalog->reset_forTest();
+ MongoDSessionCatalog::onStepUp(_opCtx);
+
+ reset(nss);
+ UUID ui = UUID::gen();
+ {
+ AutoGetCollection coll(_opCtx, nss, LockMode::MODE_IX);
+ ASSERT(coll);
+ ui = coll->uuid();
+ }
+
+ const auto currentTime = _clock->getTime();
+ presentTs = currentTime.clusterTime().asTimestamp();
+ // This test does not run a real ReplicationCoordinator, so must advance the snapshot
+ // manager manually.
+ auto storageEngine = cc().getServiceContext()->getStorageEngine();
+ storageEngine->getSnapshotManager()->setLastApplied(presentTs);
+ const auto beforeTxnTime = _clock->tickClusterTime(1);
+ beforeTxnTs = beforeTxnTime.asTimestamp();
+ commitEntryTs = beforeTxnTime.addTicks(1).asTimestamp();
+
+ const auto sessionId = makeLogicalSessionIdForTest();
+ _opCtx->setLogicalSessionId(sessionId);
+ _opCtx->setTxnNumber(26);
+ _opCtx->setInMultiDocumentTransaction();
+
+ ocs.emplace(_opCtx);
+
+ auto txnParticipant = TransactionParticipant::get(_opCtx);
+ ASSERT(txnParticipant);
+
+ txnParticipant.beginOrContinue(
+ _opCtx, {*_opCtx->getTxnNumber()}, false /* autocommit */, true /* startTransaction */);
+ txnParticipant.unstashTransactionResources(_opCtx, "insert");
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ insertDocument(autoColl.getCollection(), InsertStatement(doc));
+ }
+ txnParticipant.stashTransactionResources(_opCtx);
+
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
+ const auto& coll = autoColl.getCollection();
+ assertDocumentAtTimestamp(coll, presentTs, BSONObj());
+ assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
+ assertDocumentAtTimestamp(coll, commitEntryTs, BSONObj());
+ assertDocumentAtTimestamp(coll, _nullTs, BSONObj());
+
+ const auto commitFilter = BSON("ts" << commitEntryTs);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, _nullTs, false);
+ }
+ }
+
+ void logTimestamps() const {
+ LOGV2(22508, "Present TS: {presentTs}", "presentTs"_attr = presentTs);
+ LOGV2(22509, "Before transaction TS: {beforeTxnTs}", "beforeTxnTs"_attr = beforeTxnTs);
+ LOGV2(22510, "Commit entry TS: {commitEntryTs}", "commitEntryTs"_attr = commitEntryTs);
+ }
+
+ BSONObj getSessionTxnInfoAtTimestamp(const Timestamp& ts, bool expected) {
+ AutoGetCollection autoColl(
+ _opCtx, NamespaceString::kSessionTransactionsTableNamespace, LockMode::MODE_IX);
+ const auto sessionId = *_opCtx->getLogicalSessionId();
+ const auto txnNum = *_opCtx->getTxnNumber();
+ BSONObj doc;
+ OneOffRead oor(_opCtx, ts);
+ bool found = Helpers::findOne(_opCtx,
+ autoColl.getCollection(),
+ BSON("_id" << sessionId.toBSON() << "txnNum" << txnNum),
+ doc);
+ if (expected) {
+ ASSERT(found) << "Missing session transaction info at " << ts;
+ } else {
+ ASSERT_FALSE(found) << "Session transaction info at " << ts
+ << " is unexpectedly present " << doc;
+ }
+ return doc;
+ }
+
+protected:
+ NamespaceString nss;
+ Timestamp presentTs;
+ Timestamp beforeTxnTs;
+ Timestamp commitEntryTs;
+
+ boost::optional<MongoDOperationContextSession> ocs;
+};
+
+TEST_F(MultiDocumentTransactionTest, MultiDocumentTransaction) {
+ auto txnParticipant = TransactionParticipant::get(_opCtx);
+ ASSERT(txnParticipant);
+ logTimestamps();
+
+ txnParticipant.unstashTransactionResources(_opCtx, "insert");
+
+ txnParticipant.commitUnpreparedTransaction(_opCtx);
+
+ txnParticipant.stashTransactionResources(_opCtx);
+ assertNoStartOpTime();
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ const auto& coll = autoColl.getCollection();
+ assertDocumentAtTimestamp(coll, presentTs, BSONObj());
+ assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
+ assertDocumentAtTimestamp(coll, commitEntryTs, doc);
+ assertDocumentAtTimestamp(coll, _nullTs, doc);
+
+ const auto commitFilter = BSON("ts" << commitEntryTs);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, _nullTs, true);
+ }
+}
+
+// Including this class in a test fixture forces transactions to use one oplog entry per operation
+// instead of packing them into as few oplog entries as fit. This allows testing of the timestamps
+// of multi-oplog-entry transactions.
+class MultiOplogScopedSettings {
+public:
+ MultiOplogScopedSettings()
+ : _prevPackingLimit(gMaxNumberOfTransactionOperationsInSingleOplogEntry) {
+ gMaxNumberOfTransactionOperationsInSingleOplogEntry = 1;
+ }
+ ~MultiOplogScopedSettings() {
+ gMaxNumberOfTransactionOperationsInSingleOplogEntry = _prevPackingLimit;
+ }
+
+private:
+ int _prevPackingLimit;
+};
+
+TEST_F(MultiDocumentTransactionTest, MultiOplogEntryTransaction) {
+ MultiOplogScopedSettings multiOplogSettings;
+
+ const auto currentTime = _clock->getTime();
+ const auto clusterTime = currentTime.clusterTime();
+ Timestamp firstOplogEntryTs = clusterTime.addTicks(1).asTimestamp();
+ commitEntryTs = clusterTime.addTicks(2).asTimestamp();
+
+ auto txnParticipant = TransactionParticipant::get(_opCtx);
+ ASSERT(txnParticipant);
+ logTimestamps();
+
+ txnParticipant.unstashTransactionResources(_opCtx, "insert");
+
+ const BSONObj doc2 = BSON("_id" << 2 << "TestValue" << 2);
+ const BSONObj doc2Key = BSON("_id" << 2);
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ insertDocument(autoColl.getCollection(), InsertStatement(doc2));
+ }
+ txnParticipant.commitUnpreparedTransaction(_opCtx);
+
+ txnParticipant.stashTransactionResources(_opCtx);
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ const BSONObj query1 = BSON("_id" << 1);
+ const BSONObj query2 = BSON("_id" << 2);
+ const auto& coll = autoColl.getCollection();
+
+ // Collection should be empty until commit, at which point both documents
+ // should show up.
+ assertDocumentAtTimestamp(coll, presentTs, BSONObj());
+ assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
+ assertDocumentAtTimestamp(coll, firstOplogEntryTs, BSONObj());
+ assertFilteredDocumentAtTimestamp(coll, query1, commitEntryTs, doc);
+ assertFilteredDocumentAtTimestamp(coll, query2, commitEntryTs, doc2);
+ assertFilteredDocumentAtTimestamp(coll, query1, _nullTs, doc);
+ assertFilteredDocumentAtTimestamp(coll, query2, _nullTs, doc2);
+
+ // Implicit commit oplog entry should exist at commitEntryTs.
+ const auto commitFilter = BSON(
+ "ts" << commitEntryTs << "o"
+ << BSON("applyOps" << BSON_ARRAY(BSON("op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << coll->uuid()
+ << "o" << doc2 << "o2" << doc2Key))
+ << "count" << 2));
+ assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, firstOplogEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, _nullTs, true);
+
+ // Check that the oldestActiveTxnTimestamp properly accounts for in-progress
+ // transactions.
+ assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
+ assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, firstOplogEntryTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, commitEntryTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, _nullTs);
+
+ // first oplog entry should exist at firstOplogEntryTs and after it.
+ const auto firstOplogEntryFilter = BSON(
+ "ts" << firstOplogEntryTs << "o"
+ << BSON("applyOps" << BSON_ARRAY(BSON("op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << coll->uuid()
+ << "o" << doc << "o2" << docKey))
+ << "partialTxn" << true));
+ assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, firstOplogEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, commitEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, _nullTs, true);
+
+ // Session state should go to inProgress at firstOplogEntryTs, then to committed
+ // at commitEntryTs
+ getSessionTxnInfoAtTimestamp(presentTs, false);
+ getSessionTxnInfoAtTimestamp(beforeTxnTs, false);
+ auto sessionInfo = getSessionTxnInfoAtTimestamp(firstOplogEntryTs, true);
+ ASSERT_EQ(sessionInfo["state"].String(), "inProgress");
+ ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), firstOplogEntryTs);
+ ASSERT_EQ(sessionInfo["startOpTime"]["ts"].timestamp(), firstOplogEntryTs);
+
+ sessionInfo = getSessionTxnInfoAtTimestamp(commitEntryTs, true);
+ ASSERT_EQ(sessionInfo["state"].String(), "committed");
+ ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), commitEntryTs);
+ ASSERT_FALSE(sessionInfo.hasField("startOpTime"));
+
+ sessionInfo = getSessionTxnInfoAtTimestamp(_nullTs, true);
+ ASSERT_EQ(sessionInfo["state"].String(), "committed");
+ ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), commitEntryTs);
+ ASSERT_FALSE(sessionInfo.hasField("startOpTime"));
+ }
+}
+
+TEST_F(MultiDocumentTransactionTest, CommitPreparedMultiOplogEntryTransaction) {
+ MultiOplogScopedSettings multiOplogSettings;
+
+ const auto currentTime = _clock->getTime();
+ const auto clusterTime = currentTime.clusterTime();
+ Timestamp firstOplogEntryTs = clusterTime.addTicks(1).asTimestamp();
+ Timestamp prepareEntryTs = clusterTime.addTicks(2).asTimestamp();
+ Timestamp commitTs = clusterTime.addTicks(3).asTimestamp();
+ Timestamp commitEntryTs = clusterTime.addTicks(4).asTimestamp();
+
+ auto txnParticipant = TransactionParticipant::get(_opCtx);
+ ASSERT(txnParticipant);
+ LOGV2(22511, "PrepareTS: {prepareEntryTs}", "prepareEntryTs"_attr = prepareEntryTs);
+ logTimestamps();
+
+ const auto prepareFilter = BSON("ts" << prepareEntryTs);
+ const auto commitFilter = BSON("ts" << commitEntryTs);
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
+ const auto& coll = autoColl.getCollection();
+ assertDocumentAtTimestamp(coll, presentTs, BSONObj());
+ assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
+ assertDocumentAtTimestamp(coll, firstOplogEntryTs, BSONObj());
+ assertDocumentAtTimestamp(coll, prepareEntryTs, BSONObj());
+ assertDocumentAtTimestamp(coll, commitEntryTs, BSONObj());
+
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, firstOplogEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, commitEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, _nullTs, false);
+
+ assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, prepareEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, _nullTs, false);
+
+ assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, prepareEntryTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, commitEntryTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, _nullTs);
+ }
+ txnParticipant.unstashTransactionResources(_opCtx, "insert");
+ const BSONObj doc2 = BSON("_id" << 2 << "TestValue" << 2);
+ const BSONObj doc2Key = BSON("_id" << 2);
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ insertDocument(autoColl.getCollection(), InsertStatement(doc2));
+ }
+ txnParticipant.prepareTransaction(_opCtx, {});
+
+ const BSONObj query1 = BSON("_id" << 1);
+ const BSONObj query2 = BSON("_id" << 2);
+
+ txnParticipant.stashTransactionResources(_opCtx);
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
+ const auto& coll = autoColl.getCollection();
+ assertDocumentAtTimestamp(coll, presentTs, BSONObj());
+ assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
+ assertDocumentAtTimestamp(coll, firstOplogEntryTs, BSONObj());
+
+ {
+ IgnorePrepareBlock ignorePrepare(_opCtx);
+ // Perform the following while ignoring prepare conflicts. These calls would
+ // otherwise wait forever until the prepared transaction committed or aborted.
+ assertDocumentAtTimestamp(coll, prepareEntryTs, BSONObj());
+ assertDocumentAtTimestamp(coll, commitEntryTs, BSONObj());
+ assertDocumentAtTimestamp(coll, _nullTs, BSONObj());
+ }
+
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, firstOplogEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, commitEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, _nullTs, true);
+
+ // We haven't committed the prepared transaction
+ assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, firstOplogEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, prepareEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, _nullTs, false);
+
+ assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
+ assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, firstOplogEntryTs);
+ assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, prepareEntryTs);
+ // The transaction has not been committed yet and is still considered active.
+ assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, commitEntryTs);
+ assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, _nullTs);
+ }
+
+ txnParticipant.unstashTransactionResources(_opCtx, "commitTransaction");
+
+ {
+ FailPointEnableBlock failPointBlock("skipCommitTxnCheckPrepareMajorityCommitted");
+ txnParticipant.commitPreparedTransaction(_opCtx, commitTs, {});
+ }
+
+ txnParticipant.stashTransactionResources(_opCtx);
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ const auto& coll = autoColl.getCollection();
+ assertDocumentAtTimestamp(coll, presentTs, BSONObj());
+ assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
+ assertDocumentAtTimestamp(coll, firstOplogEntryTs, BSONObj());
+ assertDocumentAtTimestamp(coll, prepareEntryTs, BSONObj());
+ assertFilteredDocumentAtTimestamp(coll, query1, commitEntryTs, doc);
+ assertFilteredDocumentAtTimestamp(coll, query2, commitEntryTs, doc2);
+ assertFilteredDocumentAtTimestamp(coll, query1, _nullTs, doc);
+ assertFilteredDocumentAtTimestamp(coll, query2, _nullTs, doc2);
+
+ // The prepare oplog entry should exist at prepareEntryTs and onwards.
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, commitEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, _nullTs, true);
+
+ // The commit oplog entry should exist at commitEntryTs and onwards.
+ assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, prepareEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, _nullTs, true);
+
+ // The first oplog entry should exist at firstOplogEntryTs and onwards.
+ const auto firstOplogEntryFilter = BSON(
+ "ts" << firstOplogEntryTs << "o"
+ << BSON("applyOps" << BSON_ARRAY(BSON("op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << coll->uuid()
+ << "o" << doc << "o2" << docKey))
+ << "partialTxn" << true));
+ assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, firstOplogEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, prepareEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, commitEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, _nullTs, true);
+ // The prepare oplog entry should exist at prepareEntryTs and onwards.
+ const auto prepareOplogEntryFilter = BSON(
+ "ts" << prepareEntryTs << "o"
+ << BSON("applyOps" << BSON_ARRAY(BSON("op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << coll->uuid()
+ << "o" << doc2 << "o2" << doc2Key))
+ << "prepare" << true << "count" << 2));
+ assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, firstOplogEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, prepareEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, commitEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, _nullTs, true);
+
+ assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
+ assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, firstOplogEntryTs);
+ assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, prepareEntryTs);
+ // The transaction is no longer considered active after being committed.
+ assertOldestActiveTxnTimestampEquals(boost::none, commitEntryTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, _nullTs);
+
+ // The session state should go to inProgress at firstOplogEntryTs, then to prepared at
+ // prepareEntryTs, and then finally to committed at commitEntryTs.
+ auto sessionInfo = getSessionTxnInfoAtTimestamp(firstOplogEntryTs, true);
+ ASSERT_EQ(sessionInfo["state"].String(), "inProgress");
+ ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), firstOplogEntryTs);
+ ASSERT_EQ(sessionInfo["startOpTime"]["ts"].timestamp(), firstOplogEntryTs);
+
+ sessionInfo = getSessionTxnInfoAtTimestamp(prepareEntryTs, true);
+ ASSERT_EQ(sessionInfo["state"].String(), "prepared");
+ ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), prepareEntryTs);
+ ASSERT_EQ(sessionInfo["startOpTime"]["ts"].timestamp(), firstOplogEntryTs);
+
+ sessionInfo = getSessionTxnInfoAtTimestamp(_nullTs, true);
+ ASSERT_EQ(sessionInfo["state"].String(), "committed");
+ ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), commitEntryTs);
+ ASSERT_FALSE(sessionInfo.hasField("startOpTime"));
+ }
+}
+
+TEST_F(MultiDocumentTransactionTest, AbortPreparedMultiOplogEntryTransaction) {
+ Timestamp firstOplogEntryTs, prepareEntryTs, abortEntryTs;
+ MultiOplogScopedSettings multiOplogSettings;
+
+ const auto currentTime = _clock->getTime();
+ const auto clusterTime = currentTime.clusterTime();
+ prepareEntryTs = clusterTime.addTicks(1).asTimestamp();
+ abortEntryTs = clusterTime.addTicks(2).asTimestamp();
+
+ auto txnParticipant = TransactionParticipant::get(_opCtx);
+ ASSERT(txnParticipant);
+ LOGV2(22512, "PrepareTS: {prepareEntryTs}", "prepareEntryTs"_attr = prepareEntryTs);
+ LOGV2(22513, "AbortTS: {abortEntryTs}", "abortEntryTs"_attr = abortEntryTs);
+
+ const auto prepareFilter = BSON("ts" << prepareEntryTs);
+ const auto abortFilter = BSON("ts" << abortEntryTs);
+ {
+ assertOplogDocumentExistsAtTimestamp(abortFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, prepareEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, abortEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, _nullTs, false);
+ }
+ txnParticipant.unstashTransactionResources(_opCtx, "insert");
+
+ txnParticipant.prepareTransaction(_opCtx, {});
+
+ txnParticipant.stashTransactionResources(_opCtx);
+ {
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, abortEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, _nullTs, true);
+
+ assertOplogDocumentExistsAtTimestamp(abortFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, firstOplogEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, prepareEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, abortEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, _nullTs, false);
+ }
+
+ txnParticipant.unstashTransactionResources(_opCtx, "abortTransaction");
+
+ txnParticipant.abortTransaction(_opCtx);
+
+ txnParticipant.stashTransactionResources(_opCtx);
+ {
+ // The prepare oplog entry should exist at prepareEntryTs and onwards.
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, abortEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, _nullTs, true);
+
+ // The abort oplog entry should exist at abortEntryTs and onwards.
+ assertOplogDocumentExistsAtTimestamp(abortFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, prepareEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, abortEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, _nullTs, true);
+
+ UUID ui = UUID::gen();
+ {
+ AutoGetCollection coll(_opCtx, nss, LockMode::MODE_IX);
+ ASSERT(coll);
+ ui = coll->uuid();
+ }
+
+ // The prepare oplog entry should exist at firstOplogEntryTs and onwards.
+ const auto prepareOplogEntryFilter =
+ BSON("ts" << prepareEntryTs << "o"
+ << BSON("applyOps" << BSON_ARRAY(BSON("op"
+ << "i"
+ << "ns" << nss.ns() << "ui" << ui << "o"
+ << doc << "o2" << docKey))
+ << "prepare" << true));
+ assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, prepareEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, abortEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, _nullTs, true);
+
+ assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, abortEntryTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, _nullTs);
+
+ // The session state should be "aborted" at abortEntryTs.
+ auto sessionInfo = getSessionTxnInfoAtTimestamp(abortEntryTs, true);
+ ASSERT_EQ(sessionInfo["state"].String(), "aborted");
+ ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), abortEntryTs);
+ ASSERT_FALSE(sessionInfo.hasField("startOpTime"));
+ }
+}
+
+TEST_F(MultiDocumentTransactionTest, PreparedMultiDocumentTransaction) {
+ auto txnParticipant = TransactionParticipant::get(_opCtx);
+ ASSERT(txnParticipant);
+
+ const auto currentTime = _clock->getTime();
+ const auto clusterTime = currentTime.clusterTime();
+ const auto prepareTs = clusterTime.addTicks(1).asTimestamp();
+ const auto commitTs = clusterTime.addTicks(2).asTimestamp();
+ commitEntryTs = clusterTime.addTicks(3).asTimestamp();
+ LOGV2(22514, "Prepare TS: {prepareTs}", "prepareTs"_attr = prepareTs);
+ logTimestamps();
+
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
+ const auto& coll = autoColl.getCollection();
+ assertDocumentAtTimestamp(coll, prepareTs, BSONObj());
+ assertDocumentAtTimestamp(coll, commitEntryTs, BSONObj());
+
+ const auto prepareFilter = BSON("ts" << prepareTs);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, commitEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, _nullTs, false);
+
+ const auto commitFilter = BSON("ts" << commitEntryTs);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, prepareTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, false);
+ }
+ txnParticipant.unstashTransactionResources(_opCtx, "insert");
+
+ txnParticipant.prepareTransaction(_opCtx, {});
+
+ txnParticipant.stashTransactionResources(_opCtx);
+ assertHasStartOpTime();
+ {
+ const auto prepareFilter = BSON("ts" << prepareTs);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, commitEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, _nullTs, true);
+
+ const auto commitFilter = BSON("ts" << commitEntryTs);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, prepareTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, _nullTs, false);
+
+ assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
+ assertOldestActiveTxnTimestampEquals(prepareTs, prepareTs);
+ assertOldestActiveTxnTimestampEquals(prepareTs, _nullTs);
+ assertOldestActiveTxnTimestampEquals(prepareTs, commitEntryTs);
+ }
+ txnParticipant.unstashTransactionResources(_opCtx, "commitTransaction");
+
+ {
+ FailPointEnableBlock failPointBlock("skipCommitTxnCheckPrepareMajorityCommitted");
+ txnParticipant.commitPreparedTransaction(_opCtx, commitTs, {});
+ }
+
+ assertNoStartOpTime();
+
+ txnParticipant.stashTransactionResources(_opCtx);
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ const auto& coll = autoColl.getCollection();
+ assertDocumentAtTimestamp(coll, presentTs, BSONObj());
+ assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
+ assertDocumentAtTimestamp(coll, prepareTs, BSONObj());
+ assertDocumentAtTimestamp(coll, commitEntryTs, doc);
+ assertDocumentAtTimestamp(coll, _nullTs, doc);
+
+ const auto prepareFilter = BSON("ts" << prepareTs);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, commitEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, _nullTs, true);
+
+ const auto commitFilter = BSON("ts" << commitEntryTs);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, prepareTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, _nullTs, true);
+
+ assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
+ assertOldestActiveTxnTimestampEquals(prepareTs, prepareTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, commitEntryTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, _nullTs);
+ }
+}
+
+TEST_F(MultiDocumentTransactionTest, AbortedPreparedMultiDocumentTransaction) {
+ auto txnParticipant = TransactionParticipant::get(_opCtx);
+ ASSERT(txnParticipant);
+
+ const auto currentTime = _clock->getTime();
+ const auto clusterTime = currentTime.clusterTime();
+ const auto prepareTs = clusterTime.addTicks(1).asTimestamp();
+ const auto abortEntryTs = clusterTime.addTicks(2).asTimestamp();
+ LOGV2(22515, "Prepare TS: {prepareTs}", "prepareTs"_attr = prepareTs);
+ logTimestamps();
+
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
+ const auto& coll = autoColl.getCollection();
+ assertDocumentAtTimestamp(coll, prepareTs, BSONObj());
+ assertDocumentAtTimestamp(coll, abortEntryTs, BSONObj());
+
+ const auto prepareFilter = BSON("ts" << prepareTs);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, abortEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, _nullTs, false);
+
+ const auto commitFilter = BSON("ts" << abortEntryTs);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, prepareTs, false);
+ assertOplogDocumentExistsAtTimestamp(commitFilter, abortEntryTs, false);
+ }
+ txnParticipant.unstashTransactionResources(_opCtx, "insert");
+
+ txnParticipant.prepareTransaction(_opCtx, {});
+
+ txnParticipant.stashTransactionResources(_opCtx);
+ assertHasStartOpTime();
+ {
+ const auto prepareFilter = BSON("ts" << prepareTs);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, abortEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, _nullTs, true);
+
+ const auto abortFilter = BSON("ts" << abortEntryTs);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, prepareTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, abortEntryTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, _nullTs, false);
+
+ assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
+ assertOldestActiveTxnTimestampEquals(prepareTs, prepareTs);
+ assertOldestActiveTxnTimestampEquals(prepareTs, _nullTs);
+ assertOldestActiveTxnTimestampEquals(prepareTs, abortEntryTs);
+ }
+ txnParticipant.unstashTransactionResources(_opCtx, "abortTransaction");
+
+ txnParticipant.abortTransaction(_opCtx);
+ assertNoStartOpTime();
+
+ txnParticipant.stashTransactionResources(_opCtx);
+ {
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
+ const auto& coll = autoColl.getCollection();
+ assertDocumentAtTimestamp(coll, presentTs, BSONObj());
+ assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
+ assertDocumentAtTimestamp(coll, prepareTs, BSONObj());
+ assertDocumentAtTimestamp(coll, abortEntryTs, BSONObj());
+ assertDocumentAtTimestamp(coll, _nullTs, BSONObj());
+
+ const auto prepareFilter = BSON("ts" << prepareTs);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, abortEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(prepareFilter, _nullTs, true);
+
+ const auto abortFilter = BSON("ts" << abortEntryTs);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, presentTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, beforeTxnTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, prepareTs, false);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, abortEntryTs, true);
+ assertOplogDocumentExistsAtTimestamp(abortFilter, _nullTs, true);
+
+ assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
+ assertOldestActiveTxnTimestampEquals(prepareTs, prepareTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, abortEntryTs);
+ assertOldestActiveTxnTimestampEquals(boost::none, _nullTs);
+ }
+}
+} // namespace mongo
diff --git a/src/mongo/db/service_context_d_test_fixture.cpp b/src/mongo/db/service_context_d_test_fixture.cpp
index 62bf0f453e6..c5aaed6c870 100644
--- a/src/mongo/db/service_context_d_test_fixture.cpp
+++ b/src/mongo/db/service_context_d_test_fixture.cpp
@@ -41,9 +41,11 @@
#include "mongo/db/catalog/collection_impl.h"
#include "mongo/db/catalog/database_holder_impl.h"
#include "mongo/db/concurrency/d_concurrency.h"
+#include "mongo/db/global_settings.h"
#include "mongo/db/index/index_access_method_factory_impl.h"
#include "mongo/db/index_builds_coordinator_mongod.h"
#include "mongo/db/op_observer_registry.h"
+#include "mongo/db/repl/repl_settings.h"
#include "mongo/db/s/collection_sharding_state_factory_shard.h"
#include "mongo/db/service_entry_point_mongod.h"
#include "mongo/db/storage/control/storage_control.h"
@@ -51,6 +53,7 @@
#include "mongo/db/storage/storage_options.h"
#include "mongo/logv2/log.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/clock_source_mock.h"
#include "mongo/util/periodic_runner_factory.h"
namespace mongo {
@@ -63,9 +66,18 @@ ServiceContextMongoDTest::ServiceContextMongoDTest(std::string engine)
ServiceContextMongoDTest::ServiceContextMongoDTest(std::string engine,
RepairAction repair,
- StorageEngineInitFlags initFlags)
+ StorageEngineInitFlags initFlags,
+ bool useReplSettings,
+ bool useMockClock)
: _tempDir("service_context_d_test_fixture") {
+ if (useReplSettings) {
+ repl::ReplSettings replSettings;
+ replSettings.setOplogSizeBytes(10 * 1024 * 1024);
+ replSettings.setReplSetString("rs0");
+ setGlobalReplSettings(replSettings);
+ }
+
_stashedStorageParams.engine = std::exchange(storageGlobalParams.engine, std::move(engine));
_stashedStorageParams.engineSetByUser =
std::exchange(storageGlobalParams.engineSetByUser, true);
@@ -83,6 +95,25 @@ ServiceContextMongoDTest::ServiceContextMongoDTest(std::string engine,
}
auto const serviceContext = getServiceContext();
+ if (useMockClock) {
+ // Copied from dbtests.cpp. DBTests sets up a controlled mock clock while
+ // ServiceContextMongoDTest uses the system clock. Tests moved from dbtests to unittests may
+ // depend on a deterministic clock.
+ auto fastClock = std::make_unique<ClockSourceMock>();
+ // Timestamps are split into two 32-bit integers, seconds and "increments". Currently
+ // (but maybe not for eternity), a Timestamp with a value of `0` seconds is always
+ // considered "null" by `Timestamp::isNull`, regardless of its increment value. Ticking
+ // the `ClockSourceMock` only bumps the "increment" counter, thus by default, generating
+ // "null" timestamps. Bumping by one second here avoids any accidental interpretations.
+ fastClock->advance(Seconds(1));
+ serviceContext->setFastClockSource(std::move(fastClock));
+
+ auto preciseClock = std::make_unique<ClockSourceMock>();
+ // See above.
+ preciseClock->advance(Seconds(1));
+ serviceContext->setPreciseClockSource(std::move(preciseClock));
+ }
+
serviceContext->setServiceEntryPoint(std::make_unique<ServiceEntryPointMongod>(serviceContext));
// Set up the periodic runner to allow background job execution for tests that require it.
diff --git a/src/mongo/db/service_context_d_test_fixture.h b/src/mongo/db/service_context_d_test_fixture.h
index 55ab1bb28c9..539bf087634 100644
--- a/src/mongo/db/service_context_d_test_fixture.h
+++ b/src/mongo/db/service_context_d_test_fixture.h
@@ -54,7 +54,9 @@ protected:
explicit ServiceContextMongoDTest(std::string engine);
ServiceContextMongoDTest(std::string engine,
RepairAction repair,
- StorageEngineInitFlags initFlags = kDefaultStorageEngineInitFlags);
+ StorageEngineInitFlags initFlags = kDefaultStorageEngineInitFlags,
+ bool useReplSettings = false,
+ bool useMockClock = false);
virtual ~ServiceContextMongoDTest();
void tearDown() override;
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index 2c6e9e6f29f..2f17cb04310 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -2139,7 +2139,7 @@ WiredTigerRecordStoreCursorBase::WiredTigerRecordStoreCursorBase(OperationContex
bool forward)
: _rs(rs), _opCtx(opCtx), _forward(forward) {
if (_rs._isOplog) {
- _oplogVisibleTs = WiredTigerRecoveryUnit::get(opCtx)->getOplogVisibilityTs();
+ initOplogVisibility(_opCtx);
}
_cursor.emplace(rs.getURI(), rs.tableId(), true, opCtx);
}
@@ -2176,6 +2176,27 @@ boost::optional<Record> WiredTigerRecordStoreCursorBase::next() {
id = getKey(c);
}
+ // If we're using a read timestamp and we're a reverse cursor positioned outside of that bound,
+ // walk backwards until we find a suitable record. This is exercised when doing a reverse
+ // natural order collection scan.
+ if (_readTimestampForOplog && !_forward) {
+ invariant(_rs._isOplog);
+ while (id.getLong() > *_readTimestampForOplog) {
+ int advanceRet = wiredTigerPrepareConflictRetry(_opCtx, [&] { return c->prev(c); });
+ if (advanceRet == WT_NOTFOUND) {
+ _eof = true;
+ return {};
+ }
+ invariantWTOK(advanceRet, c->session);
+ id = getKey(c);
+ }
+ }
+
+ if (_readTimestampForOplog && id.getLong() > *_readTimestampForOplog) {
+ _eof = true;
+ return {};
+ }
+
if (_forward && _oplogVisibleTs && id.getLong() > *_oplogVisibleTs) {
_eof = true;
return {};
@@ -2211,6 +2232,11 @@ boost::optional<Record> WiredTigerRecordStoreCursorBase::next() {
boost::optional<Record> WiredTigerRecordStoreCursorBase::seekExact(const RecordId& id) {
invariant(_hasRestored);
+ if (_readTimestampForOplog && id.getLong() > *_readTimestampForOplog) {
+ _eof = true;
+ return {};
+ }
+
if (_forward && _oplogVisibleTs && id.getLong() > *_oplogVisibleTs) {
_eof = true;
return {};
@@ -2250,8 +2276,13 @@ boost::optional<Record> WiredTigerRecordStoreCursorBase::seekExact(const RecordI
boost::optional<Record> WiredTigerRecordStoreCursorBase::seekNear(const RecordId& id) {
dassert(_opCtx->lockState()->isReadLocked());
- // Forward scans on the oplog must round down to the oplog visibility timestamp.
+ // Oplog queries must manually implement read_timestamp visibility.
RecordId start = id;
+ if (_readTimestampForOplog && start.getLong() > *_readTimestampForOplog) {
+ start = RecordId(*_readTimestampForOplog);
+ }
+
+ // Additionally, forward scanning oplog cursors must not see past holes.
if (_forward && _oplogVisibleTs && start.getLong() > *_oplogVisibleTs) {
start = RecordId(*_oplogVisibleTs);
}
@@ -2299,8 +2330,12 @@ boost::optional<Record> WiredTigerRecordStoreCursorBase::seekNear(const RecordId
curId = getKey(c);
- // For forward cursors on the oplog, the oplog visible timestamp is treated as the end of the
- // record store. So if we are positioned past this point, then there are no visible records.
+ // After we've positioned to the first document to return, apply visibility rules again.
+ if (_readTimestampForOplog && curId.getLong() > *_readTimestampForOplog) {
+ _eof = true;
+ return boost::none;
+ }
+
if (_forward && _oplogVisibleTs && curId.getLong() > *_oplogVisibleTs) {
_eof = true;
return boost::none;
@@ -2322,6 +2357,7 @@ void WiredTigerRecordStoreCursorBase::save() {
if (_cursor)
_cursor->reset();
_oplogVisibleTs = boost::none;
+ _readTimestampForOplog = boost::none;
_hasRestored = false;
} catch (const WriteConflictException&) {
// Ignore since this is only called when we are about to kill our transaction
@@ -2329,16 +2365,28 @@ void WiredTigerRecordStoreCursorBase::save() {
}
}
+void WiredTigerRecordStoreCursorBase::initOplogVisibility(OperationContext* opCtx) {
+ auto wtRu = WiredTigerRecoveryUnit::get(opCtx);
+ wtRu->setIsOplogReader();
+ if (_forward) {
+ _oplogVisibleTs = wtRu->getOplogVisibilityTs();
+ }
+ boost::optional<Timestamp> readTs = wtRu->getPointInTimeReadTimestamp(opCtx);
+ if (readTs && readTs->asLL() != 0) {
+ // One cannot pass a read_timestamp of 0 to WT, but a "0" is commonly understand as every
+ // time is visible.
+ _readTimestampForOplog = readTs->asInt64();
+ }
+}
+
void WiredTigerRecordStoreCursorBase::saveUnpositioned() {
save();
_lastReturnedId = RecordId();
}
bool WiredTigerRecordStoreCursorBase::restore(bool tolerateCappedRepositioning) {
- if (_rs._isOplog && _forward) {
- auto wtRu = WiredTigerRecoveryUnit::get(_opCtx);
- wtRu->setIsOplogReader();
- _oplogVisibleTs = wtRu->getOplogVisibilityTs();
+ if (_rs._isOplog) {
+ initOplogVisibility(_opCtx);
}
if (!_cursor)
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
index f2ab84e3f67..48ed14ea570 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
@@ -432,6 +432,7 @@ protected:
private:
bool isVisible(const RecordId& id);
+ void initOplogVisibility(OperationContext* opCtx);
/**
* This value is used for visibility calculations on what oplog entries can be returned to a
@@ -439,6 +440,15 @@ private:
* established.
*/
boost::optional<std::int64_t> _oplogVisibleTs = boost::none;
+
+ /**
+ * With WT-8601, WiredTiger no longer maintains commit_timestamp information on writes to logged
+ * tables, such as the oplog. There are occasions where the server applies a TimestampReadsource
+ * (e.g: majority) and expects the storage engine to obey the prior semantics. When a cursor is
+ * opened against the oplog, we will populate this variable with the recovery unit's read
+ * timestamp to apply a visibility check.
+ */
+ boost::optional<std::int64_t> _readTimestampForOplog = boost::none;
bool _saveStorageCursorOnDetachFromOperationContext = false;
};
diff --git a/src/mongo/dbtests/SConscript b/src/mongo/dbtests/SConscript
index 9074965c16f..3fe761cb0b9 100644
--- a/src/mongo/dbtests/SConscript
+++ b/src/mongo/dbtests/SConscript
@@ -125,7 +125,6 @@ env.Program(
'repltests.cpp',
'rollbacktests.cpp',
'socktests.cpp',
- 'storage_timestamp_tests.cpp',
'storage_debug_util.cpp',
'threadedtests.cpp',
'updatetests.cpp',
@@ -150,7 +149,6 @@ env.Program(
"$BUILD_DIR/mongo/db/exec/document_value/document_value_test_util",
"$BUILD_DIR/mongo/db/index/index_access_methods",
"$BUILD_DIR/mongo/db/index/index_build_interceptor",
- "$BUILD_DIR/mongo/db/index/skipped_record_tracker",
"$BUILD_DIR/mongo/db/logical_time_metadata_hook",
"$BUILD_DIR/mongo/db/mongohasher",
"$BUILD_DIR/mongo/db/multitenancy",
@@ -169,7 +167,6 @@ env.Program(
"$BUILD_DIR/mongo/db/repl/replmocks",
"$BUILD_DIR/mongo/db/repl/serveronly_repl",
"$BUILD_DIR/mongo/db/repl/storage_interface_impl",
- "$BUILD_DIR/mongo/db/repl/timestamp_block",
"$BUILD_DIR/mongo/db/server_options_core",
"$BUILD_DIR/mongo/db/sessions_collection_standalone",
"$BUILD_DIR/mongo/db/storage/durable_catalog_impl",
diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp
deleted file mode 100644
index 79390ab1f2a..00000000000
--- a/src/mongo/dbtests/storage_timestamp_tests.cpp
+++ /dev/null
@@ -1,4416 +0,0 @@
-/**
- * Copyright (C) 2018-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
-#include "mongo/platform/basic.h"
-
-#include <cstdint>
-
-#include "mongo/bson/bsonmisc.h"
-#include "mongo/bson/mutable/algorithm.h"
-#include "mongo/bson/simple_bsonobj_comparator.h"
-#include "mongo/bson/timestamp.h"
-#include "mongo/db/catalog/collection.h"
-#include "mongo/db/catalog/collection_catalog.h"
-#include "mongo/db/catalog/create_collection.h"
-#include "mongo/db/catalog/document_validation.h"
-#include "mongo/db/catalog/drop_database.h"
-#include "mongo/db/catalog/drop_indexes.h"
-#include "mongo/db/catalog/index_catalog.h"
-#include "mongo/db/catalog/multi_index_block.h"
-#include "mongo/db/client.h"
-#include "mongo/db/concurrency/write_conflict_exception.h"
-#include "mongo/db/db_raii.h"
-#include "mongo/db/dbdirectclient.h"
-#include "mongo/db/dbhelpers.h"
-#include "mongo/db/global_settings.h"
-#include "mongo/db/index/index_build_interceptor.h"
-#include "mongo/db/index/index_descriptor.h"
-#include "mongo/db/index/wildcard_access_method.h"
-#include "mongo/db/index_builds_coordinator.h"
-#include "mongo/db/multi_key_path_tracker.h"
-#include "mongo/db/op_observer_impl.h"
-#include "mongo/db/op_observer_registry.h"
-#include "mongo/db/query/wildcard_multikey_paths.h"
-#include "mongo/db/repl/apply_ops.h"
-#include "mongo/db/repl/drop_pending_collection_reaper.h"
-#include "mongo/db/repl/multiapplier.h"
-#include "mongo/db/repl/oplog.h"
-#include "mongo/db/repl/oplog_applier.h"
-#include "mongo/db/repl/oplog_applier_impl.h"
-#include "mongo/db/repl/oplog_applier_impl_test_fixture.h"
-#include "mongo/db/repl/oplog_entry.h"
-#include "mongo/db/repl/oplog_entry_test_helpers.h"
-#include "mongo/db/repl/optime.h"
-#include "mongo/db/repl/repl_client_info.h"
-#include "mongo/db/repl/replication_consistency_markers_impl.h"
-#include "mongo/db/repl/replication_consistency_markers_mock.h"
-#include "mongo/db/repl/replication_coordinator.h"
-#include "mongo/db/repl/replication_coordinator_mock.h"
-#include "mongo/db/repl/replication_process.h"
-#include "mongo/db/repl/replication_recovery_mock.h"
-#include "mongo/db/repl/storage_interface_impl.h"
-#include "mongo/db/repl/timestamp_block.h"
-#include "mongo/db/s/collection_sharding_state_factory_shard.h"
-#include "mongo/db/service_context.h"
-#include "mongo/db/session.h"
-#include "mongo/db/session_catalog_mongod.h"
-#include "mongo/db/storage/snapshot_manager.h"
-#include "mongo/db/storage/storage_engine_impl.h"
-#include "mongo/db/tenant_namespace.h"
-#include "mongo/db/transaction_participant.h"
-#include "mongo/db/transaction_participant_gen.h"
-#include "mongo/db/vector_clock_mutable.h"
-#include "mongo/dbtests/dbtests.h"
-#include "mongo/idl/server_parameter_test_util.h"
-#include "mongo/logv2/log.h"
-#include "mongo/rpc/get_status_from_command_result.h"
-#include "mongo/stdx/future.h"
-#include "mongo/unittest/unittest.h"
-#include "mongo/util/fail_point.h"
-#include "mongo/util/stacktrace.h"
-
-namespace mongo {
-namespace {
-
-/**
- * RAII type for operating at a timestamp. Will remove any timestamping when the object destructs.
- */
-class OneOffRead {
-public:
- OneOffRead(OperationContext* opCtx, const Timestamp& ts) : _opCtx(opCtx) {
- _opCtx->recoveryUnit()->abandonSnapshot();
- if (ts.isNull()) {
- _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
- } else {
- _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided, ts);
- }
- }
-
- ~OneOffRead() {
- _opCtx->recoveryUnit()->abandonSnapshot();
- _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
- }
-
-private:
- OperationContext* _opCtx;
-};
-
-class IgnorePrepareBlock {
-public:
- IgnorePrepareBlock(OperationContext* opCtx) : _opCtx(opCtx) {
- _opCtx->recoveryUnit()->abandonSnapshot();
- _opCtx->recoveryUnit()->setPrepareConflictBehavior(
- PrepareConflictBehavior::kIgnoreConflicts);
- }
-
- ~IgnorePrepareBlock() {
- _opCtx->recoveryUnit()->abandonSnapshot();
- _opCtx->recoveryUnit()->setPrepareConflictBehavior(PrepareConflictBehavior::kEnforce);
- }
-
-private:
- OperationContext* _opCtx;
-};
-} // namespace
-
-const auto kIndexVersion = IndexDescriptor::IndexVersion::kV2;
-
-void assertIndexMetaDataMissing(std::shared_ptr<BSONCollectionCatalogEntry::MetaData> collMetaData,
- StringData indexName) {
- const auto idxOffset = collMetaData->findIndexOffset(indexName);
- ASSERT_EQUALS(-1, idxOffset) << indexName << ". Collection Metdata: " << collMetaData->toBSON();
-}
-
-BSONCollectionCatalogEntry::IndexMetaData getIndexMetaData(
- std::shared_ptr<BSONCollectionCatalogEntry::MetaData> collMetaData, StringData indexName) {
- const auto idxOffset = collMetaData->findIndexOffset(indexName);
- ASSERT_GT(idxOffset, -1) << indexName;
- return collMetaData->indexes[idxOffset];
-}
-
-class DoNothingOplogApplierObserver : public repl::OplogApplier::Observer {
-public:
- void onBatchBegin(const std::vector<repl::OplogEntry>&) final {}
- void onBatchEnd(const StatusWith<repl::OpTime>&, const std::vector<repl::OplogEntry>&) final {}
-};
-
-class StorageTimestampTest {
-public:
- ServiceContext::UniqueOperationContext _opCtxRaii = cc().makeOperationContext();
- OperationContext* _opCtx = _opCtxRaii.get();
- VectorClockMutable* _clock = VectorClockMutable::get(_opCtx);
-
- // Set up Timestamps in the past, present, and future.
- const LogicalTime pastLt = _clock->tickClusterTime(1);
- const Timestamp pastTs = pastLt.asTimestamp();
- const LogicalTime presentLt = _clock->tickClusterTime(1);
- const Timestamp presentTs = presentLt.asTimestamp();
- const LogicalTime futureLt = presentLt.addTicks(1);
- const Timestamp futureTs = futureLt.asTimestamp();
- const Timestamp nullTs = Timestamp();
- const int presentTerm = 1;
- repl::ReplicationCoordinatorMock* _coordinatorMock;
- repl::ReplicationConsistencyMarkers* _consistencyMarkers;
-
- StorageTimestampTest() {
- repl::ReplSettings replSettings;
- replSettings.setOplogSizeBytes(10 * 1024 * 1024);
- replSettings.setReplSetString("rs0");
- setGlobalReplSettings(replSettings);
- auto coordinatorMock =
- new repl::ReplicationCoordinatorMock(_opCtx->getServiceContext(), replSettings);
- _coordinatorMock = coordinatorMock;
- coordinatorMock->alwaysAllowWrites(true);
- repl::ReplicationCoordinator::set(
- _opCtx->getServiceContext(),
- std::unique_ptr<repl::ReplicationCoordinator>(coordinatorMock));
- repl::StorageInterface::set(_opCtx->getServiceContext(),
- std::make_unique<repl::StorageInterfaceImpl>());
-
- auto replicationProcess = new repl::ReplicationProcess(
- repl::StorageInterface::get(_opCtx->getServiceContext()),
- std::make_unique<repl::ReplicationConsistencyMarkersMock>(),
- std::make_unique<repl::ReplicationRecoveryMock>());
- repl::ReplicationProcess::set(
- cc().getServiceContext(),
- std::unique_ptr<repl::ReplicationProcess>(replicationProcess));
-
- _consistencyMarkers =
- repl::ReplicationProcess::get(cc().getServiceContext())->getConsistencyMarkers();
-
- // Since the Client object persists across tests, even though the global
- // ReplicationCoordinator does not, we need to clear the last op associated with the client
- // to avoid the invariant in ReplClientInfo::setLastOp that the optime only goes forward.
- repl::ReplClientInfo::forClient(_opCtx->getClient()).clearLastOp();
-
- auto registry = std::make_unique<OpObserverRegistry>();
- registry->addObserver(std::make_unique<OpObserverImpl>());
- _opCtx->getServiceContext()->setOpObserver(std::move(registry));
-
- repl::createOplog(_opCtx);
-
- _clock->tickClusterTimeTo(LogicalTime(Timestamp(1, 0)));
-
- ASSERT_EQUALS(presentTs, pastLt.addTicks(1).asTimestamp());
- setReplCoordAppliedOpTime(repl::OpTime(presentTs, presentTerm));
- }
-
- ~StorageTimestampTest() {
- try {
- reset(NamespaceString("local.oplog.rs"));
- } catch (...) {
- FAIL("Exception while cleaning up test");
- }
- }
-
- /**
- * Walking on ice: resetting the ReplicationCoordinator destroys the underlying
- * `DropPendingCollectionReaper`. Use a truncate/dropAllIndexes to clean out a collection
- * without actually dropping it.
- */
- void reset(NamespaceString nss) const {
- ::mongo::writeConflictRetry(_opCtx, "deleteAll", nss.ns(), [&] {
- _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
- _opCtx->recoveryUnit()->abandonSnapshot();
- AutoGetCollection collRaii(_opCtx, nss, LockMode::MODE_X);
-
- if (collRaii) {
- WriteUnitOfWork wunit(_opCtx);
- invariant(collRaii.getWritableCollection(_opCtx)->truncate(_opCtx).isOK());
- if (_opCtx->recoveryUnit()->getCommitTimestamp().isNull()) {
- ASSERT_OK(_opCtx->recoveryUnit()->setTimestamp(Timestamp(1, 1)));
- }
- collRaii.getWritableCollection(_opCtx)->getIndexCatalog()->dropAllIndexes(
- _opCtx, collRaii.getWritableCollection(_opCtx), false);
- wunit.commit();
- return;
- }
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- auto db = autoColl.ensureDbExists(_opCtx);
- WriteUnitOfWork wunit(_opCtx);
- if (_opCtx->recoveryUnit()->getCommitTimestamp().isNull()) {
- ASSERT_OK(_opCtx->recoveryUnit()->setTimestamp(Timestamp(1, 1)));
- }
- invariant(db->createCollection(_opCtx, nss));
- wunit.commit();
- });
- }
-
- void insertDocument(const CollectionPtr& coll, const InsertStatement& stmt) {
- // Insert some documents.
- OpDebug* const nullOpDebug = nullptr;
- const bool fromMigrate = false;
- ASSERT_OK(coll->insertDocument(_opCtx, stmt, nullOpDebug, fromMigrate));
- }
-
- void createIndex(CollectionWriter& coll, std::string indexName, const BSONObj& indexKey) {
-
- // Build an index.
- MultiIndexBlock indexer;
- ScopeGuard abortOnExit(
- [&] { indexer.abortIndexBuild(_opCtx, coll, MultiIndexBlock::kNoopOnCleanUpFn); });
-
- BSONObj indexInfoObj;
- {
- auto swIndexInfoObj =
- indexer.init(_opCtx,
- coll,
- {BSON("v" << 2 << "name" << indexName << "key" << indexKey)},
- MultiIndexBlock::makeTimestampedIndexOnInitFn(_opCtx, coll.get()));
- ASSERT_OK(swIndexInfoObj.getStatus());
- indexInfoObj = std::move(swIndexInfoObj.getValue()[0]);
- }
-
- ASSERT_OK(indexer.insertAllDocumentsInCollection(_opCtx, coll.get()));
- ASSERT_OK(indexer.checkConstraints(_opCtx, coll.get()));
-
- {
- WriteUnitOfWork wuow(_opCtx);
- // Timestamping index completion. Primaries write an oplog entry.
- ASSERT_OK(
- indexer.commit(_opCtx,
- coll.getWritableCollection(),
- [&](const BSONObj& indexSpec) {
- _opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
- _opCtx, coll->ns(), coll->uuid(), indexSpec, false);
- },
- MultiIndexBlock::kNoopOnCommitFn));
- // The timestamping repsponsibility is placed on the caller rather than the
- // MultiIndexBlock.
- wuow.commit();
- }
- abortOnExit.dismiss();
- }
-
- std::int32_t itCount(const CollectionPtr& coll) {
- std::uint64_t ret = 0;
- auto cursor = coll->getRecordStore()->getCursor(_opCtx);
- while (cursor->next() != boost::none) {
- ++ret;
- }
-
- return ret;
- }
-
- BSONObj findOne(const CollectionPtr& coll) {
- auto optRecord = coll->getRecordStore()->getCursor(_opCtx)->next();
- if (optRecord == boost::none) {
- // Print a stack trace to help disambiguate which `findOne` failed.
- printStackTrace();
- FAIL("Did not find any documents.");
- }
- return optRecord.get().data.toBson();
- }
-
- std::shared_ptr<BSONCollectionCatalogEntry::MetaData> getMetaDataAtTime(
- DurableCatalog* durableCatalog, RecordId catalogId, const Timestamp& ts) {
- OneOffRead oor(_opCtx, ts);
- return durableCatalog->getMetaData(_opCtx, catalogId);
- }
-
- StatusWith<BSONObj> doAtomicApplyOps(const std::string& dbName,
- const std::list<BSONObj>& applyOpsList) {
- OneOffRead oor(_opCtx, Timestamp::min());
-
- BSONObjBuilder result;
- Status status = applyOps(_opCtx,
- dbName,
- BSON("applyOps" << applyOpsList),
- repl::OplogApplication::Mode::kApplyOpsCmd,
- &result);
- if (!status.isOK()) {
- return status;
- }
-
- return {result.obj()};
- }
-
- // Creates a dummy command operation to persuade `applyOps` to be non-atomic.
- StatusWith<BSONObj> doNonAtomicApplyOps(const std::string& dbName,
- const std::list<BSONObj>& applyOpsList) {
- OneOffRead oor(_opCtx, Timestamp::min());
-
- BSONObjBuilder result;
- Status status = applyOps(_opCtx,
- dbName,
- BSON("applyOps" << applyOpsList << "allowAtomic" << false),
- repl::OplogApplication::Mode::kApplyOpsCmd,
- &result);
- if (!status.isOK()) {
- return status;
- }
-
- return {result.obj()};
- }
-
- BSONObj queryCollection(NamespaceString nss, const BSONObj& query) {
- BSONObj ret;
- ASSERT_TRUE(Helpers::findOne(
- _opCtx, AutoGetCollectionForRead(_opCtx, nss).getCollection(), query, ret))
- << "Query: " << query;
- return ret;
- }
-
- BSONObj queryOplog(const BSONObj& query) {
- OneOffRead oor(_opCtx, Timestamp::min());
- return queryCollection(NamespaceString::kRsOplogNamespace, query);
- }
-
- void assertMinValidDocumentAtTimestamp(const NamespaceString& nss,
- const Timestamp& ts,
- const repl::MinValidDocument& expectedDoc) {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- const CollectionPtr& coll = autoColl.getCollection();
-
- OneOffRead oor(_opCtx, ts);
-
- auto doc =
- repl::MinValidDocument::parse(IDLParserErrorContext("MinValidDocument"), findOne(coll));
- ASSERT_EQ(expectedDoc.getMinValidTimestamp(), doc.getMinValidTimestamp())
- << "minValid timestamps weren't equal at " << ts.toString()
- << ". Expected: " << expectedDoc.toBSON() << ". Found: " << doc.toBSON();
- ASSERT_EQ(expectedDoc.getMinValidTerm(), doc.getMinValidTerm())
- << "minValid terms weren't equal at " << ts.toString()
- << ". Expected: " << expectedDoc.toBSON() << ". Found: " << doc.toBSON();
- ASSERT_EQ(expectedDoc.getAppliedThrough(), doc.getAppliedThrough())
- << "appliedThrough OpTimes weren't equal at " << ts.toString()
- << ". Expected: " << expectedDoc.toBSON() << ". Found: " << doc.toBSON();
- ASSERT_EQ(expectedDoc.getInitialSyncFlag(), doc.getInitialSyncFlag())
- << "Initial sync flags weren't equal at " << ts.toString()
- << ". Expected: " << expectedDoc.toBSON() << ". Found: " << doc.toBSON();
- }
-
- void assertDocumentAtTimestamp(const CollectionPtr& coll,
- const Timestamp& ts,
- const BSONObj& expectedDoc) {
- OneOffRead oor(_opCtx, ts);
- if (expectedDoc.isEmpty()) {
- ASSERT_EQ(0, itCount(coll))
- << "Should not find any documents in " << coll->ns() << " at ts: " << ts;
- } else {
- ASSERT_EQ(1, itCount(coll))
- << "Should find one document in " << coll->ns() << " at ts: " << ts;
- auto doc = findOne(coll);
- ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(doc, expectedDoc))
- << "Doc: " << doc.toString() << " Expected: " << expectedDoc.toString();
- }
- }
-
- void assertFilteredDocumentAtTimestamp(const CollectionPtr& coll,
- const BSONObj& query,
- const Timestamp& ts,
- boost::optional<const BSONObj&> expectedDoc) {
- OneOffRead oor(_opCtx, ts);
- BSONObj doc;
- bool found = Helpers::findOne(_opCtx, coll, query, doc);
- if (!expectedDoc) {
- ASSERT_FALSE(found) << "Should not find any documents in " << coll->ns() << " matching "
- << query << " at ts: " << ts;
- } else {
- ASSERT(found) << "Should find document in " << coll->ns() << " matching " << query
- << " at ts: " << ts;
- ASSERT_BSONOBJ_EQ(doc, *expectedDoc);
- }
- }
-
- void assertOplogDocumentExistsAtTimestamp(const BSONObj& query,
- const Timestamp& ts,
- bool exists) {
- OneOffRead oor(_opCtx, ts);
- BSONObj ret;
- bool found = Helpers::findOne(
- _opCtx,
- AutoGetCollectionForRead(_opCtx, NamespaceString::kRsOplogNamespace).getCollection(),
- query,
- ret);
- ASSERT_EQ(found, exists) << "Found " << ret << " at " << ts.toBSON();
- ASSERT_EQ(!ret.isEmpty(), exists) << "Found " << ret << " at " << ts.toBSON();
- }
-
- void assertOldestActiveTxnTimestampEquals(const boost::optional<Timestamp>& ts,
- const Timestamp& atTs) {
- auto oldest = TransactionParticipant::getOldestActiveTimestamp(atTs);
- ASSERT_EQ(oldest, ts);
- }
-
- void assertHasStartOpTime() {
- auto txnDoc = _getTxnDoc();
- ASSERT_TRUE(txnDoc.hasField(SessionTxnRecord::kStartOpTimeFieldName));
- }
-
- void assertNoStartOpTime() {
- auto txnDoc = _getTxnDoc();
- ASSERT_FALSE(txnDoc.hasField(SessionTxnRecord::kStartOpTimeFieldName));
- }
-
- void setReplCoordAppliedOpTime(const repl::OpTime& opTime, Date_t wallTime = Date_t()) {
- repl::ReplicationCoordinator::get(_opCtx->getServiceContext())
- ->setMyLastAppliedOpTimeAndWallTime({opTime, wallTime});
- ASSERT_OK(repl::ReplicationCoordinator::get(_opCtx->getServiceContext())
- ->updateTerm(_opCtx, opTime.getTerm()));
- }
-
- /**
- * Asserts that the given collection is in (or not in) the DurableCatalog's list of idents at
- * the
- * provided timestamp.
- */
- void assertNamespaceInIdents(NamespaceString nss, Timestamp ts, bool shouldExpect) {
- OneOffRead oor(_opCtx, ts);
- auto durableCatalog = DurableCatalog::get(_opCtx);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
-
- // getCollectionIdent() returns the ident for the given namespace in the DurableCatalog.
- // getAllIdents() actually looks in the RecordStore for a list of all idents, and is thus
- // versioned by timestamp. We can expect a namespace to have a consistent ident across
- // timestamps, provided the collection does not get renamed.
- auto expectedIdent =
- durableCatalog->getEntry(autoColl.getCollection()->getCatalogId()).ident;
- auto idents = durableCatalog->getAllIdents(_opCtx);
- auto found = std::find(idents.begin(), idents.end(), expectedIdent);
-
- if (shouldExpect) {
- ASSERT(found != idents.end()) << nss.ns() << " was not found at " << ts.toString();
- } else {
- ASSERT(found == idents.end()) << nss.ns() << " was found at " << ts.toString()
- << " when it should not have been.";
- }
- }
-
- /**
- * Use `ts` = Timestamp::min to observe all indexes.
- */
- std::string getNewIndexIdentAtTime(DurableCatalog* durableCatalog,
- std::vector<std::string>& origIdents,
- Timestamp ts) {
- auto ret = getNewIndexIdentsAtTime(durableCatalog, origIdents, ts);
- ASSERT_EQ(static_cast<std::size_t>(1), ret.size()) << " Num idents: " << ret.size();
- return ret[0];
- }
-
- /**
- * Use `ts` = Timestamp::min to observe all indexes.
- */
- std::vector<std::string> getNewIndexIdentsAtTime(DurableCatalog* durableCatalog,
- std::vector<std::string>& origIdents,
- Timestamp ts) {
- OneOffRead oor(_opCtx, ts);
-
- // Find the collection and index ident by performing a set difference on the original
- // idents and the current idents.
- std::vector<std::string> identsWithColl = durableCatalog->getAllIdents(_opCtx);
- std::sort(origIdents.begin(), origIdents.end());
- std::sort(identsWithColl.begin(), identsWithColl.end());
- std::vector<std::string> idxIdents;
- std::set_difference(identsWithColl.begin(),
- identsWithColl.end(),
- origIdents.begin(),
- origIdents.end(),
- std::back_inserter(idxIdents));
-
- for (const auto& ident : idxIdents) {
- ASSERT(ident.find("index-") == 0) << "Ident is not an index: " << ident;
- }
- return idxIdents;
- }
-
- std::string getDroppedIndexIdent(DurableCatalog* durableCatalog,
- std::vector<std::string>& origIdents) {
- // Find the collection and index ident by performing a set difference on the original
- // idents and the current idents.
- std::vector<std::string> identsWithColl = durableCatalog->getAllIdents(_opCtx);
- std::sort(origIdents.begin(), origIdents.end());
- std::sort(identsWithColl.begin(), identsWithColl.end());
- std::vector<std::string> collAndIdxIdents;
- std::set_difference(origIdents.begin(),
- origIdents.end(),
- identsWithColl.begin(),
- identsWithColl.end(),
- std::back_inserter(collAndIdxIdents));
-
- ASSERT(collAndIdxIdents.size() == 1) << "Num idents: " << collAndIdxIdents.size();
- return collAndIdxIdents[0];
- }
-
- std::vector<std::string> _getIdentDifference(DurableCatalog* durableCatalog,
- std::vector<std::string>& origIdents) {
- // Find the ident difference by performing a set difference on the original idents and the
- // current idents.
- std::vector<std::string> identsWithColl = durableCatalog->getAllIdents(_opCtx);
- std::sort(origIdents.begin(), origIdents.end());
- std::sort(identsWithColl.begin(), identsWithColl.end());
- std::vector<std::string> collAndIdxIdents;
- std::set_difference(identsWithColl.begin(),
- identsWithColl.end(),
- origIdents.begin(),
- origIdents.end(),
- std::back_inserter(collAndIdxIdents));
- return collAndIdxIdents;
- }
- std::tuple<std::string, std::string> getNewCollectionIndexIdent(
- DurableCatalog* durableCatalog, std::vector<std::string>& origIdents) {
- // Find the collection and index ident difference.
- auto collAndIdxIdents = _getIdentDifference(durableCatalog, origIdents);
-
- ASSERT(collAndIdxIdents.size() == 1 || collAndIdxIdents.size() == 2);
- if (collAndIdxIdents.size() == 1) {
- // `system.profile` collections do not have an `_id` index.
- return std::tie(collAndIdxIdents[0], "");
- }
- if (collAndIdxIdents.size() == 2) {
- // The idents are sorted, so the `collection-...` comes before `index-...`
- return std::tie(collAndIdxIdents[0], collAndIdxIdents[1]);
- }
-
- MONGO_UNREACHABLE;
- }
-
- /**
- * Note: expectedNewIndexIdents should include the _id index.
- */
- void assertRenamedCollectionIdentsAtTimestamp(DurableCatalog* durableCatalog,
- std::vector<std::string>& origIdents,
- size_t expectedNewIndexIdents,
- Timestamp timestamp) {
- OneOffRead oor(_opCtx, timestamp);
- // Find the collection and index ident difference.
- auto collAndIdxIdents = _getIdentDifference(durableCatalog, origIdents);
- size_t newNssIdents, newIdxIdents;
- newNssIdents = newIdxIdents = 0;
- for (const auto& ident : collAndIdxIdents) {
- ASSERT(ident.find("index-") == 0 || ident.find("collection-") == 0)
- << "Ident is not an index or collection: " << ident;
- if (ident.find("collection-") == 0) {
- ASSERT(++newNssIdents == 1) << "Expected new collection idents (1) differ from "
- "actual new collection idents ("
- << newNssIdents << ")";
- } else {
- newIdxIdents++;
- }
- }
- ASSERT(expectedNewIndexIdents == newIdxIdents)
- << "Expected new index idents (" << expectedNewIndexIdents
- << ") differ from actual new index idents (" << newIdxIdents << ")";
- }
-
- void assertIdentsExistAtTimestamp(DurableCatalog* durableCatalog,
- const std::string& collIdent,
- const std::string& indexIdent,
- Timestamp timestamp) {
- OneOffRead oor(_opCtx, timestamp);
-
- auto allIdents = durableCatalog->getAllIdents(_opCtx);
- if (collIdent.size() > 0) {
- // Index build test does not pass in a collection ident.
- ASSERT(std::find(allIdents.begin(), allIdents.end(), collIdent) != allIdents.end());
- }
-
- if (indexIdent.size() > 0) {
- // `system.profile` does not have an `_id` index.
- ASSERT(std::find(allIdents.begin(), allIdents.end(), indexIdent) != allIdents.end());
- }
- }
-
- void assertIdentsMissingAtTimestamp(DurableCatalog* durableCatalog,
- const std::string& collIdent,
- const std::string& indexIdent,
- Timestamp timestamp) {
- OneOffRead oor(_opCtx, timestamp);
- auto allIdents = durableCatalog->getAllIdents(_opCtx);
- if (collIdent.size() > 0) {
- // Index build test does not pass in a collection ident.
- ASSERT(std::find(allIdents.begin(), allIdents.end(), collIdent) == allIdents.end());
- }
-
- ASSERT(std::find(allIdents.begin(), allIdents.end(), indexIdent) == allIdents.end())
- << "Ident: " << indexIdent << " Timestamp: " << timestamp;
- }
-
- std::string dumpMultikeyPaths(const MultikeyPaths& multikeyPaths) {
- std::stringstream ss;
-
- ss << "[ ";
- for (const auto& multikeyComponents : multikeyPaths) {
- ss << "[ ";
- for (const auto& multikeyComponent : multikeyComponents) {
- ss << multikeyComponent << " ";
- }
- ss << "] ";
- }
- ss << "]";
-
- return ss.str();
- }
-
- void assertMultikeyPaths(OperationContext* opCtx,
- const CollectionPtr& collection,
- StringData indexName,
- Timestamp ts,
- bool shouldBeMultikey,
- const MultikeyPaths& expectedMultikeyPaths) {
- DurableCatalog* durableCatalog = DurableCatalog::get(opCtx);
-
- OneOffRead oor(_opCtx, ts);
-
- MultikeyPaths actualMultikeyPaths;
- if (!shouldBeMultikey) {
- ASSERT_FALSE(durableCatalog->isIndexMultikey(
- opCtx, collection->getCatalogId(), indexName, &actualMultikeyPaths))
- << "index " << indexName << " should not be multikey at timestamp " << ts;
- } else {
- ASSERT(durableCatalog->isIndexMultikey(
- opCtx, collection->getCatalogId(), indexName, &actualMultikeyPaths))
- << "index " << indexName << " should be multikey at timestamp " << ts;
- }
-
- const bool match = (expectedMultikeyPaths == actualMultikeyPaths);
- if (!match) {
- FAIL(str::stream() << "TS: " << ts.toString()
- << ", Expected: " << dumpMultikeyPaths(expectedMultikeyPaths)
- << ", Actual: " << dumpMultikeyPaths(actualMultikeyPaths));
- }
- ASSERT_TRUE(match);
- }
-
-private:
- BSONObj _getTxnDoc() {
- auto txnParticipant = TransactionParticipant::get(_opCtx);
- auto txnsFilter =
- BSON("_id" << _opCtx->getLogicalSessionId()->toBSON() << "txnNum"
- << txnParticipant.getActiveTxnNumberAndRetryCounter().getTxnNumber());
- return queryCollection(NamespaceString::kSessionTransactionsTableNamespace, txnsFilter);
- }
-};
-
-class SecondaryInsertTimes : public StorageTimestampTest {
-public:
- void run() {
- // In order for applyOps to assign timestamps, we must be in non-replicated mode.
- repl::UnreplicatedWritesBlock uwb(_opCtx);
-
- // Create a new collection.
- NamespaceString nss("unittests.timestampedUpdates");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
-
- const std::int32_t docsToInsert = 10;
- const LogicalTime firstInsertTime = _clock->tickClusterTime(docsToInsert);
- for (std::int32_t idx = 0; idx < docsToInsert; ++idx) {
- BSONObjBuilder result;
- ASSERT_OK(applyOps(
- _opCtx,
- nss.db().toString(),
- BSON("applyOps" << BSON_ARRAY(
- BSON("ts" << firstInsertTime.addTicks(idx).asTimestamp() << "t" << 1LL
- << "v" << 2 << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid()
- << "wall" << Date_t() << "o" << BSON("_id" << idx))
- << BSON("ts" << firstInsertTime.addTicks(idx).asTimestamp() << "t" << 1LL
- << "op"
- << "c"
- << "ns"
- << "test.$cmd"
- << "wall" << Date_t() << "o"
- << BSON("applyOps" << BSONArrayBuilder().arr())))),
- repl::OplogApplication::Mode::kApplyOpsCmd,
- &result));
- }
-
- for (std::int32_t idx = 0; idx < docsToInsert; ++idx) {
- OneOffRead oor(_opCtx, firstInsertTime.addTicks(idx).asTimestamp());
-
- BSONObj result;
- ASSERT(Helpers::getLast(_opCtx, nss.ns().c_str(), result)) << " idx is " << idx;
- ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(result, BSON("_id" << idx)))
- << "Doc: " << result.toString() << " Expected: " << BSON("_id" << idx);
- }
- }
-};
-
-class SecondaryArrayInsertTimes : public StorageTimestampTest {
-public:
- void run() {
- // In order for oplog application to assign timestamps, we must be in non-replicated mode
- // and disable document validation.
- repl::UnreplicatedWritesBlock uwb(_opCtx);
- DisableDocumentValidation validationDisabler(_opCtx);
-
- // Create a new collection.
- NamespaceString nss("unittests.timestampedUpdates");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
-
- const std::int32_t docsToInsert = 10;
- const LogicalTime firstInsertTime = _clock->tickClusterTime(docsToInsert);
-
- BSONObjBuilder oplogCommonBuilder;
- oplogCommonBuilder << "v" << 2 << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid() << "wall"
- << Date_t();
- auto oplogCommon = oplogCommonBuilder.done();
-
- std::vector<repl::OplogEntry> oplogEntries;
- oplogEntries.reserve(docsToInsert);
- std::vector<const repl::OplogEntry*> opPtrs;
- BSONObjBuilder oplogEntryBuilders[docsToInsert];
- for (std::int32_t idx = 0; idx < docsToInsert; ++idx) {
- auto o = BSON("_id" << idx);
- // Populate the "ts" field.
- oplogEntryBuilders[idx] << "ts" << firstInsertTime.addTicks(idx).asTimestamp();
- // Populate the "t" (term) field.
- oplogEntryBuilders[idx] << "t" << 1LL;
- // Populate the "o" field.
- oplogEntryBuilders[idx] << "o" << o;
- // Populate the "wall" field
- oplogEntryBuilders[idx] << "wall" << Date_t();
- // Populate the other common fields.
- oplogEntryBuilders[idx].appendElementsUnique(oplogCommon);
- // Insert ops to be applied.
- oplogEntries.push_back(repl::OplogEntry(oplogEntryBuilders[idx].done()));
- opPtrs.push_back(&(oplogEntries.back()));
- }
-
- repl::OplogEntryOrGroupedInserts groupedInserts(opPtrs.cbegin(), opPtrs.cend());
- const bool dataIsConsistent = true;
- ASSERT_OK(repl::applyOplogEntryOrGroupedInserts(
- _opCtx, groupedInserts, repl::OplogApplication::Mode::kSecondary, dataIsConsistent));
-
- for (std::int32_t idx = 0; idx < docsToInsert; ++idx) {
- OneOffRead oor(_opCtx, firstInsertTime.addTicks(idx).asTimestamp());
-
- BSONObj result;
- ASSERT(Helpers::getLast(_opCtx, nss.ns().c_str(), result)) << " idx is " << idx;
- ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(result, BSON("_id" << idx)))
- << "Doc: " << result.toString() << " Expected: " << BSON("_id" << idx);
- }
- }
-};
-
-class SecondaryDeleteTimes : public StorageTimestampTest {
-public:
- void run() {
- // In order for applyOps to assign timestamps, we must be in non-replicated mode.
- repl::UnreplicatedWritesBlock uwb(_opCtx);
-
- // Create a new collection.
- NamespaceString nss("unittests.timestampedDeletes");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
-
- // Insert some documents.
- const std::int32_t docsToInsert = 10;
- const LogicalTime firstInsertTime = _clock->tickClusterTime(docsToInsert);
- const LogicalTime lastInsertTime = firstInsertTime.addTicks(docsToInsert - 1);
- WriteUnitOfWork wunit(_opCtx);
- for (std::int32_t num = 0; num < docsToInsert; ++num) {
- insertDocument(autoColl.getCollection(),
- InsertStatement(BSON("_id" << num << "a" << num),
- firstInsertTime.addTicks(num).asTimestamp(),
- 0LL));
- }
- wunit.commit();
- ASSERT_EQ(docsToInsert, itCount(autoColl.getCollection()));
-
- // Delete all documents one at a time.
- const LogicalTime startDeleteTime = _clock->tickClusterTime(docsToInsert);
- for (std::int32_t num = 0; num < docsToInsert; ++num) {
- ASSERT_OK(doNonAtomicApplyOps(
- nss.db().toString(),
- {BSON("ts" << startDeleteTime.addTicks(num).asTimestamp() << "t" << 0LL
- << "v" << 2 << "op"
- << "d"
- << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid()
- << "wall" << Date_t() << "o" << BSON("_id" << num))})
- .getStatus());
- }
-
- for (std::int32_t num = 0; num <= docsToInsert; ++num) {
- // The first loop queries at `lastInsertTime` and should count all documents. Querying
- // at each successive tick counts one less document.
- OneOffRead oor(_opCtx, lastInsertTime.addTicks(num).asTimestamp());
- ASSERT_EQ(docsToInsert - num, itCount(autoColl.getCollection()));
- }
- }
-};
-
-class SecondaryUpdateTimes : public StorageTimestampTest {
-public:
- void run() {
- // In order for applyOps to assign timestamps, we must be in non-replicated mode.
- repl::UnreplicatedWritesBlock uwb(_opCtx);
-
- // Create a new collection.
- NamespaceString nss("unittests.timestampedUpdates");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
-
- // Insert one document that will go through a series of updates.
- const LogicalTime insertTime = _clock->tickClusterTime(1);
- WriteUnitOfWork wunit(_opCtx);
- insertDocument(autoColl.getCollection(),
- InsertStatement(BSON("_id" << 0), insertTime.asTimestamp(), 0LL));
- wunit.commit();
- ASSERT_EQ(1, itCount(autoColl.getCollection()));
-
- // Each pair in the vector represents the update to perform at the next tick of the
- // clock. `pair.first` is the update to perform and `pair.second` is the full value of the
- // document after the transformation.
- const std::vector<std::pair<BSONObj, BSONObj>> updates = {
- {BSON("$set" << BSON("val" << 1)), BSON("_id" << 0 << "val" << 1)},
- {BSON("$unset" << BSON("val" << 1)), BSON("_id" << 0)},
- {BSON("$addToSet" << BSON("theSet" << 1)),
- BSON("_id" << 0 << "theSet" << BSON_ARRAY(1))},
- {BSON("$addToSet" << BSON("theSet" << 2)),
- BSON("_id" << 0 << "theSet" << BSON_ARRAY(1 << 2))},
- {BSON("$pull" << BSON("theSet" << 1)), BSON("_id" << 0 << "theSet" << BSON_ARRAY(2))},
- {BSON("$pull" << BSON("theSet" << 2)), BSON("_id" << 0 << "theSet" << BSONArray())},
- {BSON("$set" << BSON("theMap.val" << 1)),
- BSON("_id" << 0 << "theSet" << BSONArray() << "theMap" << BSON("val" << 1))},
- {BSON("$rename" << BSON("theSet"
- << "theOtherSet")),
- BSON("_id" << 0 << "theMap" << BSON("val" << 1) << "theOtherSet" << BSONArray())}};
-
- const LogicalTime firstUpdateTime = _clock->tickClusterTime(updates.size());
- for (std::size_t idx = 0; idx < updates.size(); ++idx) {
- ASSERT_OK(doNonAtomicApplyOps(
- nss.db().toString(),
- {BSON("ts" << firstUpdateTime.addTicks(idx).asTimestamp() << "t" << 0LL
- << "v" << 2 << "op"
- << "u"
- << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid()
- << "wall" << Date_t() << "o2" << BSON("_id" << 0) << "o"
- << updates[idx].first)})
- .getStatus());
- }
-
- for (std::size_t idx = 0; idx < updates.size(); ++idx) {
- // Querying at each successive ticks after `insertTime` sees the document transform in
- // the series.
- OneOffRead oor(_opCtx, insertTime.addTicks(idx + 1).asTimestamp());
-
- auto doc = findOne(autoColl.getCollection());
- ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(doc, updates[idx].second))
- << "Doc: " << doc.toString() << " Expected: " << updates[idx].second.toString();
- }
- }
-};
-
-class SecondaryInsertToUpsert : public StorageTimestampTest {
-public:
- void run() {
- // In order for applyOps to assign timestamps, we must be in non-replicated mode.
- repl::UnreplicatedWritesBlock uwb(_opCtx);
-
- // Create a new collection.
- NamespaceString nss("unittests.insertToUpsert");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
-
- const LogicalTime insertTime = _clock->tickClusterTime(2);
-
- // This applyOps runs into an insert of `{_id: 0, field: 0}` followed by a second insert
- // on the same collection with `{_id: 0}`. It's expected for this second insert to be
- // turned into an upsert. The goal document does not contain `field: 0`.
- BSONObjBuilder resultBuilder;
- auto result = unittest::assertGet(doNonAtomicApplyOps(
- nss.db().toString(),
- {BSON("ts" << insertTime.asTimestamp() << "t" << 1LL << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid() << "wall"
- << Date_t() << "o" << BSON("_id" << 0 << "field" << 0)),
- BSON("ts" << insertTime.addTicks(1).asTimestamp() << "t" << 1LL << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid() << "wall"
- << Date_t() << "o" << BSON("_id" << 0))}));
-
- ASSERT_EQ(2, result.getIntField("applied"));
- ASSERT(result["results"].Array()[0].Bool());
- ASSERT(result["results"].Array()[1].Bool());
-
- // Reading at `insertTime` should show the original document, `{_id: 0, field: 0}`.
- auto recoveryUnit = _opCtx->recoveryUnit();
- recoveryUnit->abandonSnapshot();
- recoveryUnit->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
- insertTime.asTimestamp());
- auto doc = findOne(autoColl.getCollection());
- ASSERT_EQ(0,
- SimpleBSONObjComparator::kInstance.compare(doc, BSON("_id" << 0 << "field" << 0)))
- << "Doc: " << doc.toString() << " Expected: {_id: 0, field: 0}";
-
- // Reading at `insertTime + 1` should show the second insert that got converted to an
- // upsert, `{_id: 0}`.
- recoveryUnit->abandonSnapshot();
- recoveryUnit->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
- insertTime.addTicks(1).asTimestamp());
- doc = findOne(autoColl.getCollection());
- ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(doc, BSON("_id" << 0)))
- << "Doc: " << doc.toString() << " Expected: {_id: 0}";
- }
-};
-
-class SecondaryAtomicApplyOps : public StorageTimestampTest {
-public:
- void run() {
- // Create a new collection.
- NamespaceString nss("unittests.insertToUpsert");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
-
- // Reserve a timestamp before the inserts should happen.
- const LogicalTime preInsertTimestamp = _clock->tickClusterTime(1);
- auto swResult =
- doAtomicApplyOps(nss.db().toString(),
- {BSON("op"
- << "i"
- << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid()
- << "o" << BSON("_id" << 0)),
- BSON("op"
- << "i"
- << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid()
- << "o" << BSON("_id" << 1))});
- ASSERT_OK(swResult);
-
- ASSERT_EQ(2, swResult.getValue().getIntField("applied"));
- ASSERT(swResult.getValue()["results"].Array()[0].Bool());
- ASSERT(swResult.getValue()["results"].Array()[1].Bool());
-
- // Reading at `preInsertTimestamp` should not find anything.
- auto recoveryUnit = _opCtx->recoveryUnit();
- recoveryUnit->abandonSnapshot();
- recoveryUnit->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
- preInsertTimestamp.asTimestamp());
- ASSERT_EQ(0, itCount(autoColl.getCollection()))
- << "Should not observe a write at `preInsertTimestamp`. TS: "
- << preInsertTimestamp.asTimestamp();
-
- // Reading at `preInsertTimestamp + 1` should observe both inserts.
- recoveryUnit = _opCtx->recoveryUnit();
- recoveryUnit->abandonSnapshot();
- recoveryUnit->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
- preInsertTimestamp.addTicks(1).asTimestamp());
- ASSERT_EQ(2, itCount(autoColl.getCollection()))
- << "Should observe both writes at `preInsertTimestamp + 1`. TS: "
- << preInsertTimestamp.addTicks(1).asTimestamp();
- }
-};
-
-
-// This should have the same result as `SecondaryInsertToUpsert` except it gets there a different
-// way. Doing an atomic `applyOps` should result in a WriteConflictException because the same
-// transaction is trying to write modify the same document twice. The `applyOps` command should
-// catch that failure and retry in non-atomic mode, preserving the timestamps supplied by the
-// user.
-class SecondaryAtomicApplyOpsWCEToNonAtomic : public StorageTimestampTest {
-public:
- void run() {
- // Create a new collectiont.
- NamespaceString nss("unitteTsts.insertToUpsert");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
-
- const LogicalTime preInsertTimestamp = _clock->tickClusterTime(1);
- auto swResult =
- doAtomicApplyOps(nss.db().toString(),
- {BSON("op"
- << "i"
- << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid()
- << "o" << BSON("_id" << 0 << "field" << 0)),
- BSON("op"
- << "i"
- << "ns" << nss.ns() << "ui" << autoColl.getCollection()->uuid()
- << "o" << BSON("_id" << 0))});
- ASSERT_OK(swResult);
-
- ASSERT_EQ(2, swResult.getValue().getIntField("applied"));
- ASSERT(swResult.getValue()["results"].Array()[0].Bool());
- ASSERT(swResult.getValue()["results"].Array()[1].Bool());
-
- // Reading at `insertTime` should not see any documents.
- auto recoveryUnit = _opCtx->recoveryUnit();
- recoveryUnit->abandonSnapshot();
- recoveryUnit->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
- preInsertTimestamp.asTimestamp());
- ASSERT_EQ(0, itCount(autoColl.getCollection()))
- << "Should not find any documents at `preInsertTimestamp`. TS: "
- << preInsertTimestamp.asTimestamp();
-
- // Reading at `preInsertTimestamp + 1` should show the final state of the document.
- recoveryUnit->abandonSnapshot();
- recoveryUnit->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
- preInsertTimestamp.addTicks(1).asTimestamp());
- auto doc = findOne(autoColl.getCollection());
- ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(doc, BSON("_id" << 0)))
- << "Doc: " << doc.toString() << " Expected: {_id: 0}";
- }
-};
-
-class SecondaryCreateCollection : public StorageTimestampTest {
-public:
- void run() {
- // In order for applyOps to assign timestamps, we must be in non-replicated mode.
- repl::UnreplicatedWritesBlock uwb(_opCtx);
-
- NamespaceString nss("unittests.secondaryCreateCollection");
- ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(_opCtx, nss));
-
- { ASSERT_FALSE(AutoGetCollectionForReadCommand(_opCtx, nss).getCollection()); }
-
- BSONObjBuilder resultBuilder;
- auto swResult = doNonAtomicApplyOps(
- nss.db().toString(),
- {
- BSON("ts" << presentTs << "t" << 1LL << "op"
- << "c"
- << "ui" << UUID::gen() << "ns" << nss.getCommandNS().ns() << "wall"
- << Date_t() << "o" << BSON("create" << nss.coll())),
- });
- ASSERT_OK(swResult);
-
- { ASSERT(AutoGetCollectionForReadCommand(_opCtx, nss).getCollection()); }
-
- assertNamespaceInIdents(nss, pastTs, false);
- assertNamespaceInIdents(nss, presentTs, true);
- assertNamespaceInIdents(nss, futureTs, true);
- assertNamespaceInIdents(nss, nullTs, true);
- }
-};
-
-class SecondaryCreateTwoCollections : public StorageTimestampTest {
-public:
- void run() {
- // In order for applyOps to assign timestamps, we must be in non-replicated mode.
- repl::UnreplicatedWritesBlock uwb(_opCtx);
-
- std::string dbName = "unittest";
- NamespaceString nss1(dbName, "secondaryCreateTwoCollections1");
- NamespaceString nss2(dbName, "secondaryCreateTwoCollections2");
- ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(_opCtx, nss1));
- ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(_opCtx, nss2));
-
- { ASSERT_FALSE(AutoGetCollectionForReadCommand(_opCtx, nss1).getCollection()); }
- { ASSERT_FALSE(AutoGetCollectionForReadCommand(_opCtx, nss2).getCollection()); }
-
- const LogicalTime dummyLt = futureLt.addTicks(1);
- const Timestamp dummyTs = dummyLt.asTimestamp();
-
- BSONObjBuilder resultBuilder;
- auto swResult = doNonAtomicApplyOps(
- dbName,
- {
- BSON("ts" << presentTs << "t" << 1LL << "op"
- << "c"
- << "ui" << UUID::gen() << "ns" << nss1.getCommandNS().ns() << "wall"
- << Date_t() << "o" << BSON("create" << nss1.coll())),
- BSON("ts" << futureTs << "t" << 1LL << "op"
- << "c"
- << "ui" << UUID::gen() << "ns" << nss2.getCommandNS().ns() << "wall"
- << Date_t() << "o" << BSON("create" << nss2.coll())),
- });
- ASSERT_OK(swResult);
-
- { ASSERT(AutoGetCollectionForReadCommand(_opCtx, nss1).getCollection()); }
- { ASSERT(AutoGetCollectionForReadCommand(_opCtx, nss2).getCollection()); }
-
- assertNamespaceInIdents(nss1, pastTs, false);
- assertNamespaceInIdents(nss1, presentTs, true);
- assertNamespaceInIdents(nss1, futureTs, true);
- assertNamespaceInIdents(nss1, dummyTs, true);
- assertNamespaceInIdents(nss1, nullTs, true);
-
- assertNamespaceInIdents(nss2, pastTs, false);
- assertNamespaceInIdents(nss2, presentTs, false);
- assertNamespaceInIdents(nss2, futureTs, true);
- assertNamespaceInIdents(nss2, dummyTs, true);
- assertNamespaceInIdents(nss2, nullTs, true);
- }
-};
-
-class SecondaryCreateCollectionBetweenInserts : public StorageTimestampTest {
-public:
- void run() {
- // In order for applyOps to assign timestamps, we must be in non-replicated mode.
- repl::UnreplicatedWritesBlock uwb(_opCtx);
-
- std::string dbName = "unittest";
- NamespaceString nss1(dbName, "secondaryCreateCollectionBetweenInserts1");
- NamespaceString nss2(dbName, "secondaryCreateCollectionBetweenInserts2");
- BSONObj doc1 = BSON("_id" << 1 << "field" << 1);
- BSONObj doc2 = BSON("_id" << 2 << "field" << 2);
-
- const UUID uuid2 = UUID::gen();
-
- const LogicalTime insert2Lt = futureLt.addTicks(1);
- const Timestamp insert2Ts = insert2Lt.asTimestamp();
-
- const LogicalTime dummyLt = insert2Lt.addTicks(1);
- const Timestamp dummyTs = dummyLt.asTimestamp();
-
- {
- reset(nss1);
- AutoGetCollection autoColl(_opCtx, nss1, LockMode::MODE_IX);
-
- ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(_opCtx, nss2));
- { ASSERT_FALSE(AutoGetCollectionForReadCommand(_opCtx, nss2).getCollection()); }
-
- BSONObjBuilder resultBuilder;
- auto swResult = doNonAtomicApplyOps(
- dbName,
- {
- BSON("ts" << presentTs << "t" << 1LL << "op"
- << "i"
- << "ns" << nss1.ns() << "ui" << autoColl.getCollection()->uuid()
- << "wall" << Date_t() << "o" << doc1),
- BSON("ts" << futureTs << "t" << 1LL << "op"
- << "c"
- << "ui" << uuid2 << "ns" << nss2.getCommandNS().ns() << "wall"
- << Date_t() << "o" << BSON("create" << nss2.coll())),
- BSON("ts" << insert2Ts << "t" << 1LL << "op"
- << "i"
- << "ns" << nss2.ns() << "ui" << uuid2 << "wall" << Date_t() << "o"
- << doc2),
- });
- ASSERT_OK(swResult);
- }
-
- {
- AutoGetCollectionForReadCommand autoColl1(_opCtx, nss1);
- const auto& coll1 = autoColl1.getCollection();
- ASSERT(coll1);
- AutoGetCollectionForReadCommand autoColl2(_opCtx, nss2);
- const auto& coll2 = autoColl2.getCollection();
- ASSERT(coll2);
-
- assertDocumentAtTimestamp(coll1, pastTs, BSONObj());
- assertDocumentAtTimestamp(coll1, presentTs, doc1);
- assertDocumentAtTimestamp(coll1, futureTs, doc1);
- assertDocumentAtTimestamp(coll1, insert2Ts, doc1);
- assertDocumentAtTimestamp(coll1, dummyTs, doc1);
- assertDocumentAtTimestamp(coll1, nullTs, doc1);
-
- assertNamespaceInIdents(nss2, pastTs, false);
- assertNamespaceInIdents(nss2, presentTs, false);
- assertNamespaceInIdents(nss2, futureTs, true);
- assertNamespaceInIdents(nss2, insert2Ts, true);
- assertNamespaceInIdents(nss2, dummyTs, true);
- assertNamespaceInIdents(nss2, nullTs, true);
-
- assertDocumentAtTimestamp(coll2, pastTs, BSONObj());
- assertDocumentAtTimestamp(coll2, presentTs, BSONObj());
- assertDocumentAtTimestamp(coll2, futureTs, BSONObj());
- assertDocumentAtTimestamp(coll2, insert2Ts, doc2);
- assertDocumentAtTimestamp(coll2, dummyTs, doc2);
- assertDocumentAtTimestamp(coll2, nullTs, doc2);
- }
- }
-};
-
-class PrimaryCreateCollectionInApplyOps : public StorageTimestampTest {
-public:
- void run() {
- NamespaceString nss("unittests.primaryCreateCollectionInApplyOps");
- ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(_opCtx, nss));
-
- { ASSERT_FALSE(AutoGetCollectionForReadCommand(_opCtx, nss).getCollection()); }
-
- BSONObjBuilder resultBuilder;
- auto swResult = doNonAtomicApplyOps(
- nss.db().toString(),
- {
- BSON("ts" << presentTs << "t" << 1LL << "op"
- << "c"
- << "ui" << UUID::gen() << "ns" << nss.getCommandNS().ns() << "wall"
- << Date_t() << "o" << BSON("create" << nss.coll())),
- });
- ASSERT_OK(swResult);
-
- { ASSERT(AutoGetCollectionForReadCommand(_opCtx, nss).getCollection()); }
-
- BSONObj result;
- ASSERT(Helpers::getLast(
- _opCtx, NamespaceString::kRsOplogNamespace.toString().c_str(), result));
- repl::OplogEntry op(result);
- ASSERT(op.getOpType() == repl::OpTypeEnum::kCommand) << op.toBSONForLogging();
- // The next logOp() call will get 'futureTs', which will be the timestamp at which we do
- // the write. Thus we expect the write to appear at 'futureTs' and not before.
- ASSERT_EQ(op.getTimestamp(), futureTs) << op.toBSONForLogging();
- ASSERT_EQ(op.getNss().ns(), nss.getCommandNS().ns()) << op.toBSONForLogging();
- ASSERT_BSONOBJ_EQ(op.getObject(), BSON("create" << nss.coll()));
-
- assertNamespaceInIdents(nss, pastTs, false);
- assertNamespaceInIdents(nss, presentTs, false);
- assertNamespaceInIdents(nss, futureTs, true);
- assertNamespaceInIdents(nss, nullTs, true);
- }
-};
-
-class SecondarySetIndexMultikeyOnInsert : public StorageTimestampTest {
-
-public:
- void run() {
- // Pretend to be a secondary.
- repl::UnreplicatedWritesBlock uwb(_opCtx);
-
- NamespaceString nss("unittests.SecondarySetIndexMultikeyOnInsert");
- reset(nss);
- UUID uuid = UUID::gen();
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- uuid = autoColl.getCollection()->uuid();
- }
- auto indexName = "a_1";
- auto indexSpec = BSON("name" << indexName << "key" << BSON("a" << 1) << "v"
- << static_cast<int>(kIndexVersion));
- ASSERT_OK(dbtests::createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
-
- _coordinatorMock->alwaysAllowWrites(false);
-
- const LogicalTime pastTime = _clock->tickClusterTime(1);
- const LogicalTime insertTime0 = _clock->tickClusterTime(1);
- const LogicalTime insertTime1 = _clock->tickClusterTime(1);
- const LogicalTime insertTime2 = _clock->tickClusterTime(1);
-
- BSONObj doc0 = BSON("_id" << 0 << "a" << 3);
- BSONObj doc1 = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
- BSONObj doc2 = BSON("_id" << 2 << "a" << BSON_ARRAY(1 << 2));
- auto op0 = repl::OplogEntry(
- BSON("ts" << insertTime0.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc0));
- auto op1 = repl::OplogEntry(
- BSON("ts" << insertTime1.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc1));
- auto op2 = repl::OplogEntry(
- BSON("ts" << insertTime2.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc2));
- std::vector<repl::OplogEntry> ops = {op0, op1, op2};
-
- DoNothingOplogApplierObserver observer;
- auto storageInterface = repl::StorageInterface::get(_opCtx);
- auto writerPool = repl::makeReplWriterPool();
- repl::OplogApplierImpl oplogApplier(
- nullptr, // task executor. not required for applyOplogBatch().
- nullptr, // oplog buffer. not required for applyOplogBatch().
- &observer,
- _coordinatorMock,
- _consistencyMarkers,
- storageInterface,
- repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary),
- writerPool.get());
- ASSERT_EQUALS(op2.getOpTime(),
- unittest::assertGet(oplogApplier.applyOplogBatch(_opCtx, ops)));
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- assertMultikeyPaths(
- _opCtx, autoColl.getCollection(), indexName, pastTime.asTimestamp(), false, {{}});
- assertMultikeyPaths(
- _opCtx, autoColl.getCollection(), indexName, insertTime0.asTimestamp(), true, {{0}});
- assertMultikeyPaths(
- _opCtx, autoColl.getCollection(), indexName, insertTime1.asTimestamp(), true, {{0}});
- assertMultikeyPaths(
- _opCtx, autoColl.getCollection(), indexName, insertTime2.asTimestamp(), true, {{0}});
- }
-};
-
-class SecondarySetWildcardIndexMultikeyOnInsert : public StorageTimestampTest {
-
-public:
- void run() {
- // Pretend to be a secondary.
- repl::UnreplicatedWritesBlock uwb(_opCtx);
-
- NamespaceString nss("unittests.SecondarySetWildcardIndexMultikeyOnInsert");
- // Use a capped collection to prevent the batch applier from grouping insert operations
- // together in the same WUOW. This test attempts to apply operations out of order, but the
- // storage engine does not allow an operation to set out-of-order timestamps in the same
- // WUOW.
- ASSERT_OK(createCollection(
- _opCtx,
- nss.db().toString(),
- BSON("create" << nss.coll() << "capped" << true << "size" << 1 * 1024 * 1024)));
- auto uuid = [&]() {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- return autoColl.getCollection()->uuid();
- }();
-
- auto indexName = "a_1";
- auto indexSpec = BSON("name" << indexName << "key" << BSON("$**" << 1) << "v"
- << static_cast<int>(kIndexVersion));
- ASSERT_OK(dbtests::createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
-
- _coordinatorMock->alwaysAllowWrites(false);
-
- const LogicalTime insertTime0 = _clock->tickClusterTime(1);
- const LogicalTime insertTime1 = _clock->tickClusterTime(1);
- const LogicalTime insertTime2 = _clock->tickClusterTime(1);
-
- BSONObj doc0 = BSON("_id" << 0 << "a" << 3);
- BSONObj doc1 = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
- BSONObj doc2 = BSON("_id" << 2 << "a" << BSON_ARRAY(1 << 2));
- auto op0 = repl::OplogEntry(
- BSON("ts" << insertTime0.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc0));
- auto op1 = repl::OplogEntry(
- BSON("ts" << insertTime1.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc1));
- auto op2 = repl::OplogEntry(
- BSON("ts" << insertTime2.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc2));
-
- // Coerce oplog application to apply op2 before op1. This does not guarantee the actual
- // order of application however, because the oplog applier applies these operations in
- // parallel across several threads. The test accepts the possibility of a false negative
- // (test passes when it should fail) in favor of occasionally finding a true positive (test
- // fails as intended).
- std::vector<repl::OplogEntry> ops = {op0, op2, op1};
-
- DoNothingOplogApplierObserver observer;
- auto storageInterface = repl::StorageInterface::get(_opCtx);
- auto writerPool = repl::makeReplWriterPool();
- repl::OplogApplierImpl oplogApplier(
- nullptr, // task executor. not required for applyOplogBatch().
- nullptr, // oplog buffer. not required for applyOplogBatch().
- &observer,
- _coordinatorMock,
- _consistencyMarkers,
- storageInterface,
- repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering),
- writerPool.get());
-
- uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops));
-
- AutoGetCollectionForRead autoColl(_opCtx, nss);
- auto wildcardIndexDescriptor =
- autoColl.getCollection()->getIndexCatalog()->findIndexByName(_opCtx, indexName);
- const IndexAccessMethod* wildcardIndexAccessMethod = autoColl.getCollection()
- ->getIndexCatalog()
- ->getEntry(wildcardIndexDescriptor)
- ->accessMethod();
- {
- // Verify that, even though op2 was applied first, the multikey state is observed in all
- // WiredTiger transactions that can contain the data written by op1.
- OneOffRead oor(_opCtx, insertTime1.asTimestamp());
- const WildcardAccessMethod* wam =
- dynamic_cast<const WildcardAccessMethod*>(wildcardIndexAccessMethod);
- MultikeyMetadataAccessStats stats;
- std::set<FieldRef> paths = getWildcardMultikeyPathSet(wam, _opCtx, &stats);
- ASSERT_EQUALS(1, paths.size());
- ASSERT_EQUALS("a", paths.begin()->dottedField());
- }
- {
- // Oplog application conservatively uses the first optime in the batch, insertTime0, as
- // the point at which the index became multikey, despite the fact that the earliest op
- // which caused the index to become multikey did not occur until insertTime1. This works
- // because if we construct a query plan that incorrectly believes a particular path to
- // be multikey, the plan will still be correct (if possibly sub-optimal). Conversely, if
- // we were to construct a query plan that incorrectly believes a path is NOT multikey,
- // it could produce incorrect results.
- OneOffRead oor(_opCtx, insertTime0.asTimestamp());
- const WildcardAccessMethod* wam =
- dynamic_cast<const WildcardAccessMethod*>(wildcardIndexAccessMethod);
- MultikeyMetadataAccessStats stats;
- std::set<FieldRef> paths = getWildcardMultikeyPathSet(wam, _opCtx, &stats);
- ASSERT_EQUALS(1, paths.size());
- ASSERT_EQUALS("a", paths.begin()->dottedField());
- }
- }
-};
-
-class SecondarySetWildcardIndexMultikeyOnUpdate : public StorageTimestampTest {
-
-public:
- void run() {
- // Pretend to be a secondary.
- repl::UnreplicatedWritesBlock uwb(_opCtx);
-
- NamespaceString nss("unittests.SecondarySetWildcardIndexMultikeyOnUpdate");
- reset(nss);
- UUID uuid = UUID::gen();
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- uuid = autoColl.getCollection()->uuid();
- }
- auto indexName = "a_1";
- auto indexSpec = BSON("name" << indexName << "key" << BSON("$**" << 1) << "v"
- << static_cast<int>(kIndexVersion));
- ASSERT_OK(dbtests::createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
-
- _coordinatorMock->alwaysAllowWrites(false);
-
- const LogicalTime insertTime0 = _clock->tickClusterTime(1);
- const LogicalTime updateTime1 = _clock->tickClusterTime(1);
- const LogicalTime updateTime2 = _clock->tickClusterTime(1);
-
- BSONObj doc0 = BSON("_id" << 0 << "a" << 3);
- BSONObj doc1 = BSON("$v" << 1 << "$set" << BSON("a" << BSON_ARRAY(1 << 2)));
- BSONObj doc2 = BSON("$v" << 1 << "$set" << BSON("a" << BSON_ARRAY(1 << 2)));
- auto op0 = repl::OplogEntry(
- BSON("ts" << insertTime0.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc0));
- auto op1 = repl::OplogEntry(
- BSON("ts" << updateTime1.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
- << "u"
- << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc1
- << "o2" << BSON("_id" << 0)));
- auto op2 = repl::OplogEntry(
- BSON("ts" << updateTime2.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
- << "u"
- << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc2
- << "o2" << BSON("_id" << 0)));
-
- // Coerce oplog application to apply op2 before op1. This does not guarantee the actual
- // order of application however, because the oplog applier applies these operations in
- // parallel across several threads. The test accepts the possibility of a false negative
- // (test passes when it should fail) in favor of occasionally finding a true positive (test
- // fails as intended).
- std::vector<repl::OplogEntry> ops = {op0, op2, op1};
-
- DoNothingOplogApplierObserver observer;
- auto storageInterface = repl::StorageInterface::get(_opCtx);
- auto writerPool = repl::makeReplWriterPool();
- repl::OplogApplierImpl oplogApplier(
- nullptr, // task executor. not required for applyOplogBatch().
- nullptr, // oplog buffer. not required for applyOplogBatch().
- &observer,
- _coordinatorMock,
- _consistencyMarkers,
- storageInterface,
- repl::OplogApplier::Options(repl::OplogApplication::Mode::kRecovering),
- writerPool.get());
-
- uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops));
-
- AutoGetCollectionForRead autoColl(_opCtx, nss);
- auto wildcardIndexDescriptor =
- autoColl.getCollection()->getIndexCatalog()->findIndexByName(_opCtx, indexName);
- const IndexAccessMethod* wildcardIndexAccessMethod = autoColl.getCollection()
- ->getIndexCatalog()
- ->getEntry(wildcardIndexDescriptor)
- ->accessMethod();
- {
- // Verify that, even though op2 was applied first, the multikey state is observed in all
- // WiredTiger transactions that can contain the data written by op1.
- OneOffRead oor(_opCtx, updateTime1.asTimestamp());
- const WildcardAccessMethod* wam =
- dynamic_cast<const WildcardAccessMethod*>(wildcardIndexAccessMethod);
- MultikeyMetadataAccessStats stats;
- std::set<FieldRef> paths = getWildcardMultikeyPathSet(wam, _opCtx, &stats);
- ASSERT_EQUALS(1, paths.size());
- ASSERT_EQUALS("a", paths.begin()->dottedField());
- }
- {
- // Oplog application conservatively uses the first optime in the batch, insertTime0, as
- // the point at which the index became multikey, despite the fact that the earliest op
- // which caused the index to become multikey did not occur until updateTime1. This works
- // because if we construct a query plan that incorrectly believes a particular path to
- // be multikey, the plan will still be correct (if possibly sub-optimal). Conversely, if
- // we were to construct a query plan that incorrectly believes a path is NOT multikey,
- // it could produce incorrect results.
- OneOffRead oor(_opCtx, insertTime0.asTimestamp());
- const WildcardAccessMethod* wam =
- dynamic_cast<const WildcardAccessMethod*>(wildcardIndexAccessMethod);
- MultikeyMetadataAccessStats stats;
- std::set<FieldRef> paths = getWildcardMultikeyPathSet(wam, _opCtx, &stats);
- ASSERT_EQUALS(1, paths.size());
- ASSERT_EQUALS("a", paths.begin()->dottedField());
- }
- }
-};
-
-class InitialSyncSetIndexMultikeyOnInsert : public StorageTimestampTest {
-
-public:
- void run() {
- // Pretend to be a secondary.
- repl::UnreplicatedWritesBlock uwb(_opCtx);
-
- NamespaceString nss("unittests.InitialSyncSetIndexMultikeyOnInsert");
- reset(nss);
- UUID uuid = UUID::gen();
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- uuid = autoColl.getCollection()->uuid();
- }
- auto indexName = "a_1";
- auto indexSpec = BSON("name" << indexName << "key" << BSON("a" << 1) << "v"
- << static_cast<int>(kIndexVersion));
- ASSERT_OK(dbtests::createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
-
- _coordinatorMock->alwaysAllowWrites(false);
- ASSERT_OK(_coordinatorMock->setFollowerMode({repl::MemberState::MS::RS_STARTUP2}));
-
- const LogicalTime pastTime = _clock->tickClusterTime(1);
- const LogicalTime insertTime0 = _clock->tickClusterTime(1);
- const LogicalTime indexBuildTime = _clock->tickClusterTime(1);
- const LogicalTime insertTime1 = _clock->tickClusterTime(1);
- const LogicalTime insertTime2 = _clock->tickClusterTime(1);
-
- BSONObj doc0 = BSON("_id" << 0 << "a" << 3);
- BSONObj doc1 = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
- BSONObj doc2 = BSON("_id" << 2 << "a" << BSON_ARRAY(1 << 2));
- auto op0 = repl::OplogEntry(
- BSON("ts" << insertTime0.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc0));
- auto op1 = repl::OplogEntry(
- BSON("ts" << insertTime1.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc1));
- auto op2 = repl::OplogEntry(
- BSON("ts" << insertTime2.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
- << "i"
- << "ns" << nss.ns() << "ui" << uuid << "wall" << Date_t() << "o" << doc2));
- auto indexSpec2 =
- BSON("createIndexes" << nss.coll() << "v" << static_cast<int>(kIndexVersion) << "key"
- << BSON("b" << 1) << "name"
- << "b_1");
- auto createIndexOp = repl::OplogEntry(
- BSON("ts" << indexBuildTime.asTimestamp() << "t" << 1LL << "v" << 2 << "op"
- << "c"
- << "ns" << nss.getCommandNS().ns() << "ui" << uuid << "wall" << Date_t()
- << "o" << indexSpec2));
-
- // We add in an index creation op to test that we restart tracking multikey path info
- // after bulk index builds.
- std::vector<repl::OplogEntry> ops = {op0, createIndexOp, op1, op2};
-
- DoNothingOplogApplierObserver observer;
- auto storageInterface = repl::StorageInterface::get(_opCtx);
- auto writerPool = repl::makeReplWriterPool();
-
- repl::OplogApplierImpl oplogApplier(
- nullptr, // task executor. not required for applyOplogBatch().
- nullptr, // oplog buffer. not required for applyOplogBatch().
- &observer,
- _coordinatorMock,
- _consistencyMarkers,
- storageInterface,
- repl::OplogApplier::Options(repl::OplogApplication::Mode::kInitialSync),
- writerPool.get());
- auto lastTime = unittest::assertGet(oplogApplier.applyOplogBatch(_opCtx, ops));
- ASSERT_EQ(lastTime.getTimestamp(), insertTime2.asTimestamp());
-
- // Wait for the index build to finish before making any assertions.
- IndexBuildsCoordinator::get(_opCtx)->awaitNoIndexBuildInProgressForCollection(_opCtx, uuid);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
-
- // Ensure minimumVisible has not been updated due to the index creation.
- ASSERT_LT(autoColl.getCollection()->getMinimumVisibleSnapshot().get(),
- pastTime.asTimestamp());
-
- // Reading the multikey state before 'insertTime0' is not valid or reliable to test. If the
- // background index build intercepts and drains writes during inital sync, the index write
- // and the write to the multikey path state will not be timestamped. This write is not
- // timestamped because the lastApplied timestamp, which would normally be used on a primary
- // or secondary, is not always available during initial sync.
- // Additionally, it is not valid to read at a timestamp before inital sync completes, so
- // these assertions below only make sense in the context of this unit test, but would
- // otherwise not be exercised in any normal scenario.
- assertMultikeyPaths(
- _opCtx, autoColl.getCollection(), indexName, insertTime0.asTimestamp(), true, {{0}});
- assertMultikeyPaths(
- _opCtx, autoColl.getCollection(), indexName, insertTime1.asTimestamp(), true, {{0}});
- assertMultikeyPaths(
- _opCtx, autoColl.getCollection(), indexName, insertTime2.asTimestamp(), true, {{0}});
- }
-};
-
-class PrimarySetIndexMultikeyOnInsert : public StorageTimestampTest {
-
-public:
- void run() {
- NamespaceString nss("unittests.PrimarySetIndexMultikeyOnInsert");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- auto indexName = "a_1";
- auto indexSpec = BSON("name" << indexName << "key" << BSON("a" << 1) << "v"
- << static_cast<int>(kIndexVersion));
- ASSERT_OK(dbtests::createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
-
- const LogicalTime pastTime = _clock->tickClusterTime(1);
- const LogicalTime insertTime = pastTime.addTicks(1);
-
- BSONObj doc = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
- WriteUnitOfWork wunit(_opCtx);
- insertDocument(autoColl.getCollection(), InsertStatement(doc));
- wunit.commit();
-
- assertMultikeyPaths(
- _opCtx, autoColl.getCollection(), indexName, pastTime.asTimestamp(), false, {{}});
- assertMultikeyPaths(
- _opCtx, autoColl.getCollection(), indexName, insertTime.asTimestamp(), true, {{0}});
- }
-};
-
-class PrimarySetIndexMultikeyOnInsertUnreplicated : public StorageTimestampTest {
-
-public:
- void run() {
- // Use an unreplicated collection.
- NamespaceString nss("unittests.system.profile");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- auto indexName = "a_1";
- auto indexSpec = BSON("name" << indexName << "key" << BSON("a" << 1) << "v"
- << static_cast<int>(kIndexVersion));
- ASSERT_OK(dbtests::createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
-
- const LogicalTime pastTime = _clock->tickClusterTime(1);
- const LogicalTime insertTime = pastTime.addTicks(1);
-
- BSONObj doc = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
- WriteUnitOfWork wunit(_opCtx);
- insertDocument(autoColl.getCollection(), InsertStatement(doc));
- wunit.commit();
-
- assertMultikeyPaths(
- _opCtx, autoColl.getCollection(), indexName, pastTime.asTimestamp(), true, {{0}});
- assertMultikeyPaths(
- _opCtx, autoColl.getCollection(), indexName, insertTime.asTimestamp(), true, {{0}});
- }
-};
-
-class PrimarySetsMultikeyInsideMultiDocumentTransaction : public StorageTimestampTest {
-
-public:
- void run() {
- auto service = _opCtx->getServiceContext();
- auto sessionCatalog = SessionCatalog::get(service);
- sessionCatalog->reset_forTest();
- MongoDSessionCatalog::onStepUp(_opCtx);
-
- NamespaceString nss("unittests.PrimarySetsMultikeyInsideMultiDocumentTransaction");
- reset(nss);
-
- auto indexName = "a_1";
- auto indexSpec = BSON("name" << indexName << "ns" << nss.ns() << "key" << BSON("a" << 1)
- << "v" << static_cast<int>(kIndexVersion));
- auto doc = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
-
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- ASSERT_OK(dbtests::createIndexFromSpec(_opCtx, nss.ns(), indexSpec));
- }
-
- const auto currentTime = _clock->getTime();
- const auto presentTs = currentTime.clusterTime().asTimestamp();
-
- // This test does not run a real ReplicationCoordinator, so must advance the snapshot
- // manager manually.
- auto storageEngine = cc().getServiceContext()->getStorageEngine();
- storageEngine->getSnapshotManager()->setLastApplied(presentTs);
-
- const auto beforeTxnTime = _clock->tickClusterTime(1);
- auto beforeTxnTs = beforeTxnTime.asTimestamp();
- const auto multikeyNoopTime = beforeTxnTime.addTicks(1);
- auto multikeyNoopTs = multikeyNoopTime.asTimestamp();
- auto commitEntryTs = multikeyNoopTime.addTicks(1).asTimestamp();
-
- LOGV2(22502, "Present time", "timestamp"_attr = presentTs);
- LOGV2(22503, "Before transaction time", "timestamp"_attr = beforeTxnTs);
- LOGV2(4801000, "Multikey noop time", "timestamp"_attr = multikeyNoopTs);
- LOGV2(22504, "Commit entry time", "timestamp"_attr = commitEntryTs);
-
- const auto sessionId = makeLogicalSessionIdForTest();
- _opCtx->setLogicalSessionId(sessionId);
- _opCtx->setTxnNumber(1);
- _opCtx->setInMultiDocumentTransaction();
-
- // Check out the session.
- MongoDOperationContextSession ocs(_opCtx);
-
- auto txnParticipant = TransactionParticipant::get(_opCtx);
- ASSERT(txnParticipant);
-
- txnParticipant.beginOrContinue(
- _opCtx, {*_opCtx->getTxnNumber()}, false /* autocommit */, true /* startTransaction */);
- txnParticipant.unstashTransactionResources(_opCtx, "insert");
- {
- // Insert a document that will set the index as multikey.
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- insertDocument(autoColl.getCollection(), InsertStatement(doc));
- }
-
- txnParticipant.commitUnpreparedTransaction(_opCtx);
- txnParticipant.stashTransactionResources(_opCtx);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- const auto& coll = autoColl.getCollection();
-
- // Make sure the transaction committed and its writes were timestamped correctly.
- assertDocumentAtTimestamp(coll, presentTs, BSONObj());
- assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
- assertDocumentAtTimestamp(coll, multikeyNoopTs, BSONObj());
- assertDocumentAtTimestamp(coll, commitEntryTs, doc);
- assertDocumentAtTimestamp(coll, nullTs, doc);
-
- // Make sure the multikey write was timestamped correctly. For correctness, the timestamp of
- // the write that sets the multikey flag to true should be less than or equal to the first
- // write that made the index multikey, which, in this case, is the commit timestamp of the
- // transaction. In other words, it is not incorrect to assign a timestamp that is too early,
- // but it is incorrect to assign a timestamp that is too late. In this specific case, we
- // expect the write to be timestamped at the logical clock tick directly preceding the
- // commit time.
- assertMultikeyPaths(_opCtx, coll, indexName, presentTs, false /* shouldBeMultikey */, {{}});
- assertMultikeyPaths(
- _opCtx, coll, indexName, beforeTxnTs, false /* shouldBeMultikey */, {{}});
- assertMultikeyPaths(
- _opCtx, coll, indexName, multikeyNoopTs, true /* shouldBeMultikey */, {{0}});
- assertMultikeyPaths(
- _opCtx, coll, indexName, commitEntryTs, true /* shouldBeMultikey */, {{0}});
- assertMultikeyPaths(_opCtx, coll, indexName, nullTs, true /* shouldBeMultikey */, {{0}});
- }
-};
-
-class InitializeMinValid : public StorageTimestampTest {
-public:
- void run() {
- NamespaceString nss(repl::ReplicationConsistencyMarkersImpl::kDefaultMinValidNamespace);
- reset(nss);
-
- repl::ReplicationConsistencyMarkersImpl consistencyMarkers(
- repl::StorageInterface::get(_opCtx));
- consistencyMarkers.initializeMinValidDocument(_opCtx);
-
- repl::MinValidDocument expectedMinValid;
- expectedMinValid.setMinValidTerm(repl::OpTime::kUninitializedTerm);
- expectedMinValid.setMinValidTimestamp(nullTs);
-
- assertMinValidDocumentAtTimestamp(nss, nullTs, expectedMinValid);
- assertMinValidDocumentAtTimestamp(nss, pastTs, expectedMinValid);
- assertMinValidDocumentAtTimestamp(nss, presentTs, expectedMinValid);
- assertMinValidDocumentAtTimestamp(nss, futureTs, expectedMinValid);
- }
-};
-
-class SetMinValidInitialSyncFlag : public StorageTimestampTest {
-public:
- void run() {
- NamespaceString nss(repl::ReplicationConsistencyMarkersImpl::kDefaultMinValidNamespace);
- reset(nss);
-
- repl::ReplicationConsistencyMarkersImpl consistencyMarkers(
- repl::StorageInterface::get(_opCtx));
- ASSERT(consistencyMarkers.createInternalCollections(_opCtx).isOK());
- consistencyMarkers.initializeMinValidDocument(_opCtx);
- consistencyMarkers.setInitialSyncFlag(_opCtx);
-
- repl::MinValidDocument expectedMinValidWithSetFlag;
- expectedMinValidWithSetFlag.setMinValidTerm(repl::OpTime::kUninitializedTerm);
- expectedMinValidWithSetFlag.setMinValidTimestamp(nullTs);
- expectedMinValidWithSetFlag.setInitialSyncFlag(true);
-
- assertMinValidDocumentAtTimestamp(nss, nullTs, expectedMinValidWithSetFlag);
-
- consistencyMarkers.clearInitialSyncFlag(_opCtx);
-
- repl::MinValidDocument expectedMinValidWithUnsetFlag;
- expectedMinValidWithSetFlag.setMinValidTerm(repl::OpTime::kUninitializedTerm);
- expectedMinValidWithSetFlag.setMinValidTimestamp(nullTs);
-
- assertMinValidDocumentAtTimestamp(nss, nullTs, expectedMinValidWithUnsetFlag);
- }
-};
-
-class SetMinValidAppliedThrough : public StorageTimestampTest {
-public:
- void run() {
- NamespaceString nss(repl::ReplicationConsistencyMarkersImpl::kDefaultMinValidNamespace);
- reset(nss);
-
- repl::ReplicationConsistencyMarkersImpl consistencyMarkers(
- repl::StorageInterface::get(_opCtx));
- consistencyMarkers.initializeMinValidDocument(_opCtx);
-
- repl::MinValidDocument expectedMinValidInit;
- expectedMinValidInit.setMinValidTerm(repl::OpTime::kUninitializedTerm);
- expectedMinValidInit.setMinValidTimestamp(nullTs);
- assertMinValidDocumentAtTimestamp(nss, nullTs, expectedMinValidInit);
-
- consistencyMarkers.setAppliedThrough(_opCtx, repl::OpTime(presentTs, presentTerm));
-
- repl::MinValidDocument expectedMinValidPresent;
- expectedMinValidPresent.setMinValidTerm(repl::OpTime::kUninitializedTerm);
- expectedMinValidPresent.setMinValidTimestamp(nullTs);
- expectedMinValidPresent.setAppliedThrough(repl::OpTime(presentTs, presentTerm));
-
- assertMinValidDocumentAtTimestamp(nss, nullTs, expectedMinValidPresent);
-
- // appliedThrough opTime can be unset.
- consistencyMarkers.clearAppliedThrough(_opCtx);
- assertMinValidDocumentAtTimestamp(nss, nullTs, expectedMinValidInit);
- }
-};
-
-/**
- * This KVDropDatabase test only exists in this file for historical reasons, the final phase of
- * timestamping `dropDatabase` side-effects no longer applies. The purpose of this test is to
- * exercise the `StorageEngine::dropDatabase` method.
- */
-template <bool SimulatePrimary>
-class KVDropDatabase : public StorageTimestampTest {
-public:
- void run() {
- auto storageInterface = repl::StorageInterface::get(_opCtx);
- repl::DropPendingCollectionReaper::set(
- _opCtx->getServiceContext(),
- std::make_unique<repl::DropPendingCollectionReaper>(storageInterface));
-
- auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
- auto durableCatalog = storageEngine->getCatalog();
-
- // Declare the database to be in a "synced" state, i.e: in steady-state replication.
- Timestamp syncTime = _clock->tickClusterTime(1).asTimestamp();
- invariant(!syncTime.isNull());
- storageEngine->setInitialDataTimestamp(syncTime);
-
- // This test drops collections piece-wise instead of having the "drop database" algorithm
- // perform this walk. Defensively operate on a separate DB from the other tests to ensure
- // no leftover collections carry-over.
- const NamespaceString nss("unittestsDropDB.kvDropDatabase");
- const NamespaceString sysProfile("unittestsDropDB.system.profile");
-
- std::string collIdent;
- std::string indexIdent;
- std::string sysProfileIdent;
- // `*.system.profile` does not have an `_id` index. Just create it to abide by the API. This
- // value will be the empty string. Helper methods accommodate this.
- std::string sysProfileIndexIdent;
- for (auto& tuple : {std::tie(nss, collIdent, indexIdent),
- std::tie(sysProfile, sysProfileIdent, sysProfileIndexIdent)}) {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
-
- // Save the pre-state idents so we can capture the specific idents related to collection
- // creation.
- std::vector<std::string> origIdents = durableCatalog->getAllIdents(_opCtx);
- const auto& nss = std::get<0>(tuple);
-
- // Non-replicated namespaces are wrapped in an unreplicated writes block. This has the
- // side-effect of not timestamping the collection creation.
- repl::UnreplicatedWritesBlock notReplicated(_opCtx);
- if (nss.isReplicated()) {
- TimestampBlock tsBlock(_opCtx, _clock->tickClusterTime(1).asTimestamp());
- reset(nss);
- } else {
- reset(nss);
- }
-
- // Bind the local values to the variables in the parent scope.
- auto& collIdent = std::get<1>(tuple);
- auto& indexIdent = std::get<2>(tuple);
- std::tie(collIdent, indexIdent) =
- getNewCollectionIndexIdent(durableCatalog, origIdents);
- }
-
- AutoGetCollection coll(_opCtx, nss, LockMode::MODE_X);
- {
- // Drop/rename `kvDropDatabase`. `system.profile` does not get dropped/renamed.
- WriteUnitOfWork wuow(_opCtx);
- Database* db = coll.getDb();
- ASSERT_OK(db->dropCollection(_opCtx, nss));
- wuow.commit();
- }
-
- // Reserve a tick, this represents a time after the rename in which the `kvDropDatabase`
- // ident for `kvDropDatabase` still exists.
- const Timestamp postRenameTime = _clock->tickClusterTime(1).asTimestamp();
-
- // If the storage engine is managing drops internally, the ident should not be visible after
- // a drop.
- if (storageEngine->supportsPendingDrops()) {
- assertIdentsMissingAtTimestamp(durableCatalog, collIdent, indexIdent, postRenameTime);
- } else {
- // The namespace has changed, but the ident still exists as-is after the rename.
- assertIdentsExistAtTimestamp(durableCatalog, collIdent, indexIdent, postRenameTime);
- }
-
- const Timestamp dropTime = _clock->tickClusterTime(1).asTimestamp();
- if (SimulatePrimary) {
- ASSERT_OK(dropDatabaseForApplyOps(_opCtx, nss.db().toString()));
- } else {
- repl::UnreplicatedWritesBlock uwb(_opCtx);
- TimestampBlock ts(_opCtx, dropTime);
- ASSERT_OK(dropDatabaseForApplyOps(_opCtx, nss.db().toString()));
- }
-
- // Assert that the idents do not exist.
- assertIdentsMissingAtTimestamp(
- durableCatalog, sysProfileIdent, sysProfileIndexIdent, Timestamp::max());
- assertIdentsMissingAtTimestamp(durableCatalog, collIdent, indexIdent, Timestamp::max());
-
- // dropDatabase must not timestamp the final write. The collection and index should seem
- // to have never existed.
- assertIdentsMissingAtTimestamp(durableCatalog, collIdent, indexIdent, syncTime);
-
- // Reset initial data timestamp to avoid unintended storage engine timestamp side effects.
- storageEngine->setInitialDataTimestamp(Timestamp(0, 0));
- }
-};
-
-/**
- * This test asserts that the catalog updates that represent the beginning and end of an index
- * build are timestamped. Additionally, the index will be `multikey` and that catalog update that
- * finishes the index build will also observe the index is multikey.
- *
- * Primaries log no-ops when starting an index build to acquire a timestamp. A primary committing
- * an index build gets timestamped when the `createIndexes` command creates an oplog entry. That
- * step is mimiced here.
- *
- * Secondaries timestamp starting their index build by being in a `TimestampBlock` when the oplog
- * entry is processed. Secondaries will look at the logical clock when completing the index
- * build. This is safe so long as completion is not racing with secondary oplog application (i.e:
- * enforced via the parallel batch writer mode lock).
- */
-template <bool SimulatePrimary>
-class TimestampIndexBuilds : public StorageTimestampTest {
-public:
- void run() {
- const bool SimulateSecondary = !SimulatePrimary;
- if (SimulateSecondary) {
- // The MemberState is inspected during index builds to use a "ghost" write to timestamp
- // index completion.
- ASSERT_OK(_coordinatorMock->setFollowerMode({repl::MemberState::MS::RS_SECONDARY}));
- }
-
- auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
- auto durableCatalog = storageEngine->getCatalog();
-
- NamespaceString nss("unittests.timestampIndexBuilds");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
- CollectionWriter coll(_opCtx, autoColl);
-
- RecordId catalogId = autoColl.getCollection()->getCatalogId();
-
- const LogicalTime insertTimestamp = _clock->tickClusterTime(1);
- {
- WriteUnitOfWork wuow(_opCtx);
- insertDocument(coll.get(),
- InsertStatement(BSON("_id" << 0 << "a" << BSON_ARRAY(1 << 2)),
- insertTimestamp.asTimestamp(),
- presentTerm));
- wuow.commit();
- ASSERT_EQ(1, itCount(autoColl.getCollection()));
- }
-
- // Save the pre-state idents so we can capture the specific ident related to index
- // creation.
- std::vector<std::string> origIdents = durableCatalog->getAllIdents(_opCtx);
-
- // Build an index on `{a: 1}`. This index will be multikey.
- MultiIndexBlock indexer;
- ScopeGuard abortOnExit(
- [&] { indexer.abortIndexBuild(_opCtx, coll, MultiIndexBlock::kNoopOnCleanUpFn); });
- const LogicalTime beforeIndexBuild = _clock->tickClusterTime(2);
- BSONObj indexInfoObj;
- {
- // Primaries do not have a wrapping `TimestampBlock`; secondaries do.
- const Timestamp commitTimestamp =
- SimulatePrimary ? Timestamp::min() : beforeIndexBuild.addTicks(1).asTimestamp();
- TimestampBlock tsBlock(_opCtx, commitTimestamp);
-
- // Secondaries will also be in an `UnreplicatedWritesBlock` that prevents the `logOp`
- // from making creating an entry.
- boost::optional<repl::UnreplicatedWritesBlock> unreplicated;
- if (SimulateSecondary) {
- unreplicated.emplace(_opCtx);
- }
-
- auto swIndexInfoObj = indexer.init(
- _opCtx,
- coll,
- {BSON("v" << 2 << "unique" << true << "name"
- << "a_1"
- << "key" << BSON("a" << 1))},
- MultiIndexBlock::makeTimestampedIndexOnInitFn(_opCtx, autoColl.getCollection()));
- ASSERT_OK(swIndexInfoObj.getStatus());
- indexInfoObj = std::move(swIndexInfoObj.getValue()[0]);
- }
-
- const LogicalTime afterIndexInit = _clock->tickClusterTime(2);
-
- // Inserting all the documents has the side-effect of setting internal state on the index
- // builder that the index is multikey.
- ASSERT_OK(indexer.insertAllDocumentsInCollection(_opCtx, autoColl.getCollection()));
- ASSERT_OK(indexer.checkConstraints(_opCtx, autoColl.getCollection()));
-
- {
- WriteUnitOfWork wuow(_opCtx);
- // All callers of `MultiIndexBlock::commit` are responsible for timestamping index
- // completion Primaries write an oplog entry. Secondaries explicitly set a
- // timestamp.
- ASSERT_OK(
- indexer.commit(_opCtx,
- autoColl.getWritableCollection(_opCtx),
- [&](const BSONObj& indexSpec) {
- if (SimulatePrimary) {
- // The timestamping responsibility for each index is placed
- // on the caller.
- _opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
- _opCtx, nss, coll->uuid(), indexSpec, false);
- } else {
- const auto currentTime = _clock->getTime();
- ASSERT_OK(_opCtx->recoveryUnit()->setTimestamp(
- currentTime.clusterTime().asTimestamp()));
- }
- },
- MultiIndexBlock::kNoopOnCommitFn));
- wuow.commit();
- }
- abortOnExit.dismiss();
-
- const Timestamp afterIndexBuild = _clock->tickClusterTime(1).asTimestamp();
-
- const std::string indexIdent =
- getNewIndexIdentAtTime(durableCatalog, origIdents, Timestamp::min());
- assertIdentsMissingAtTimestamp(
- durableCatalog, "", indexIdent, beforeIndexBuild.asTimestamp());
-
- // Assert that the index entry exists after init and `ready: false`.
- assertIdentsExistAtTimestamp(durableCatalog, "", indexIdent, afterIndexInit.asTimestamp());
- {
- ASSERT_FALSE(
- getIndexMetaData(
- getMetaDataAtTime(durableCatalog, catalogId, afterIndexInit.asTimestamp()),
- "a_1")
- .ready);
- }
-
- // After the build completes, assert that the index is `ready: true` and multikey.
- assertIdentsExistAtTimestamp(durableCatalog, "", indexIdent, afterIndexBuild);
- {
- auto indexMetaData = getIndexMetaData(
- getMetaDataAtTime(durableCatalog, catalogId, afterIndexBuild), "a_1");
- ASSERT(indexMetaData.ready);
- ASSERT(indexMetaData.multikey);
-
- ASSERT_EQ(std::size_t(1), indexMetaData.multikeyPaths.size());
- const bool match = indexMetaData.multikeyPaths[0] == MultikeyComponents({0});
- if (!match) {
- FAIL(str::stream() << "Expected: [ [ 0 ] ] Actual: "
- << dumpMultikeyPaths(indexMetaData.multikeyPaths));
- }
- }
- }
-};
-
-template <bool SimulatePrimary>
-class TimestampIndexBuildDrain : public StorageTimestampTest {
-public:
- void run() {
- const bool SimulateSecondary = !SimulatePrimary;
- if (SimulateSecondary) {
- // The MemberState is inspected during index builds to use a "ghost" write to timestamp
- // index completion.
- ASSERT_OK(_coordinatorMock->setFollowerMode({repl::MemberState::MS::RS_SECONDARY}));
- }
-
- NamespaceString nss("unittests.timestampIndexBuildDrain");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
- CollectionWriter collection(_opCtx, autoColl);
-
- // Build an index on `{a: 1}`.
- MultiIndexBlock indexer;
- ScopeGuard abortOnExit([&] {
- indexer.abortIndexBuild(_opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn);
- });
- const LogicalTime beforeIndexBuild = _clock->tickClusterTime(2);
- BSONObj indexInfoObj;
- {
- // Primaries do not have a wrapping `TimestampBlock`; secondaries do.
- const Timestamp commitTimestamp =
- SimulatePrimary ? Timestamp::min() : beforeIndexBuild.addTicks(1).asTimestamp();
- TimestampBlock tsBlock(_opCtx, commitTimestamp);
-
- // Secondaries will also be in an `UnreplicatedWritesBlock` that prevents the `logOp`
- // from making creating an entry.
- boost::optional<repl::UnreplicatedWritesBlock> unreplicated;
- if (SimulateSecondary) {
- unreplicated.emplace(_opCtx);
- }
-
- auto swIndexInfoObj = indexer.init(
- _opCtx,
- collection,
- {BSON("v" << 2 << "unique" << true << "name"
- << "a_1"
- << "ns" << nss.ns() << "key" << BSON("a" << 1))},
- MultiIndexBlock::makeTimestampedIndexOnInitFn(_opCtx, collection.get()));
- ASSERT_OK(swIndexInfoObj.getStatus());
- indexInfoObj = std::move(swIndexInfoObj.getValue()[0]);
- }
-
- const LogicalTime afterIndexInit = _clock->tickClusterTime(1);
-
- // Insert a document that will be intercepted and need to be drained. This timestamp will
- // become the lastApplied time.
- const LogicalTime firstInsert = _clock->tickClusterTime(1);
- {
- WriteUnitOfWork wuow(_opCtx);
- insertDocument(collection.get(),
- InsertStatement(BSON("_id" << 0 << "a" << 1),
- firstInsert.asTimestamp(),
- presentTerm));
- wuow.commit();
- ASSERT_EQ(1, itCount(autoColl.getCollection()));
- }
-
- // Index build drain will timestamp writes from the side table into the index with the
- // lastApplied timestamp. This is because these writes are not associated with any specific
- // oplog entry.
- ASSERT_EQ(repl::ReplicationCoordinator::get(_opCtx->getServiceContext())
- ->getMyLastAppliedOpTime()
- .getTimestamp(),
- firstInsert.asTimestamp());
-
- ASSERT_OK(indexer.drainBackgroundWrites(_opCtx,
- RecoveryUnit::ReadSource::kNoTimestamp,
- IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
-
- auto indexCatalog = autoColl.getCollection()->getIndexCatalog();
- const IndexCatalogEntry* buildingIndex = indexCatalog->getEntry(
- indexCatalog->findIndexByName(_opCtx, "a_1", /* includeUnfinished */ true));
- ASSERT(buildingIndex);
-
- {
- // Before the drain, there are no writes to apply.
- OneOffRead oor(_opCtx, afterIndexInit.asTimestamp());
- ASSERT_TRUE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
- }
-
- // Note: In this case, we can't observe a state where all writes are not applied, because
- // the index build drain effectively rewrites history by retroactively committing the drain
- // at the same time as the first insert, meaning there is no point-in-time with undrained
- // writes. This is fine, as long as the drain does not commit at a time before this insert.
-
- {
- // At time of the first insert, all writes are applied.
- OneOffRead oor(_opCtx, firstInsert.asTimestamp());
- ASSERT_TRUE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
- }
-
- // Insert a second document that will be intercepted and need to be drained.
- const LogicalTime secondInsert = _clock->tickClusterTime(1);
- {
- WriteUnitOfWork wuow(_opCtx);
- insertDocument(autoColl.getCollection(),
- InsertStatement(BSON("_id" << 1 << "a" << 2),
- secondInsert.asTimestamp(),
- presentTerm));
- wuow.commit();
- ASSERT_EQ(2, itCount(autoColl.getCollection()));
- }
-
- // Advance the lastApplied optime to observe a point before the drain where there are
- // un-drained writes.
- const LogicalTime afterSecondInsert = _clock->tickClusterTime(1);
- setReplCoordAppliedOpTime(repl::OpTime(afterSecondInsert.asTimestamp(), presentTerm));
-
- ASSERT_OK(indexer.drainBackgroundWrites(_opCtx,
- RecoveryUnit::ReadSource::kNoTimestamp,
- IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
-
- {
- // At time of the second insert, there are un-drained writes.
- OneOffRead oor(_opCtx, secondInsert.asTimestamp());
- ASSERT_FALSE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
- }
-
- {
- // After the second insert, also the lastApplied time, all writes are applied.
- OneOffRead oor(_opCtx, afterSecondInsert.asTimestamp());
- ASSERT_TRUE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
- }
-
- ASSERT_OK(indexer.checkConstraints(_opCtx, autoColl.getCollection()));
-
- {
- WriteUnitOfWork wuow(_opCtx);
- ASSERT_OK(
- indexer.commit(_opCtx,
- collection.get(),
- [&](const BSONObj& indexSpec) {
- if (SimulatePrimary) {
- // The timestamping responsibility for each index is placed
- // on the caller.
- _opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
- _opCtx, nss, collection.get()->uuid(), indexSpec, false);
- } else {
- const auto currentTime = _clock->getTime();
- ASSERT_OK(_opCtx->recoveryUnit()->setTimestamp(
- currentTime.clusterTime().asTimestamp()));
- }
- },
- MultiIndexBlock::kNoopOnCommitFn));
- wuow.commit();
- }
- abortOnExit.dismiss();
- }
-};
-
-class TimestampMultiIndexBuilds : public StorageTimestampTest {
-public:
- void run() {
- auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
- auto durableCatalog = storageEngine->getCatalog();
-
- // Create config.system.indexBuilds collection to store commit quorum value during index
- // building.
- ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(
- _opCtx, NamespaceString::kIndexBuildEntryNamespace));
- ASSERT_OK(
- createCollection(_opCtx,
- NamespaceString::kIndexBuildEntryNamespace.db().toString(),
- BSON("create" << NamespaceString::kIndexBuildEntryNamespace.coll())));
-
- NamespaceString nss("unittests.timestampMultiIndexBuilds");
- reset(nss);
-
- std::vector<std::string> origIdents;
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
-
- const LogicalTime insertTimestamp = _clock->tickClusterTime(1);
-
- WriteUnitOfWork wuow(_opCtx);
- insertDocument(autoColl.getCollection(),
- InsertStatement(BSON("_id" << 0 << "a" << 1 << "b" << 2 << "c" << 3),
- insertTimestamp.asTimestamp(),
- presentTerm));
- wuow.commit();
- ASSERT_EQ(1, itCount(autoColl.getCollection()));
-
- // Save the pre-state idents so we can capture the specific ident related to index
- // creation.
- origIdents = durableCatalog->getAllIdents(_opCtx);
-
- // Ensure we have a committed snapshot to avoid ReadConcernMajorityNotAvailableYet
- // error at the beginning of the the collection scan phase.
- auto snapshotManager = storageEngine->getSnapshotManager();
- snapshotManager->setCommittedSnapshot(insertTimestamp.asTimestamp());
- }
-
- DBDirectClient client(_opCtx);
- {
- // Disable index build commit quorum as we don't have support of replication subsystem
- // for voting.
- auto index1 = BSON("v" << kIndexVersion << "key" << BSON("a" << 1) << "name"
- << "a_1");
- auto index2 = BSON("v" << kIndexVersion << "key" << BSON("b" << 1) << "name"
- << "b_1");
- auto createIndexesCmdObj =
- BSON("createIndexes" << nss.coll() << "indexes" << BSON_ARRAY(index1 << index2)
- << "commitQuorum" << 0);
- BSONObj result;
- ASSERT(client.runCommand(nss.db().toString(), createIndexesCmdObj, result)) << result;
- }
-
- auto indexCreateInitTs = queryOplog(BSON("op"
- << "c"
- << "o.startIndexBuild" << nss.coll()
- << "o.indexes.0.name"
- << "a_1"))["ts"]
- .timestamp();
- auto commitIndexBuildTs = queryOplog(BSON("op"
- << "c"
- << "o.commitIndexBuild" << nss.coll()
- << "o.indexes.0.name"
- << "a_1"))["ts"]
- .timestamp();
- auto indexBComplete = commitIndexBuildTs;
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_S);
- RecordId catalogId = autoColl.getCollection()->getCatalogId();
-
- // The idents are created and persisted with the "ready: false" write.
- // There should be two new index idents visible at this time.
- const std::vector<std::string> indexes =
- getNewIndexIdentsAtTime(durableCatalog, origIdents, indexCreateInitTs);
- ASSERT_EQ(static_cast<std::size_t>(2), indexes.size()) << " Num idents: " << indexes.size();
-
- ASSERT_FALSE(
- getIndexMetaData(getMetaDataAtTime(durableCatalog, catalogId, indexCreateInitTs), "a_1")
- .ready);
- ASSERT_FALSE(
- getIndexMetaData(getMetaDataAtTime(durableCatalog, catalogId, indexCreateInitTs), "b_1")
- .ready);
-
- // Assert the `b_1` index becomes ready at the last oplog entry time.
- ASSERT_TRUE(
- getIndexMetaData(getMetaDataAtTime(durableCatalog, catalogId, indexBComplete), "a_1")
- .ready);
- ASSERT_TRUE(
- getIndexMetaData(getMetaDataAtTime(durableCatalog, catalogId, indexBComplete), "b_1")
- .ready);
-
- // Assert that the index build is removed from config.system.indexBuilds collection after
- // completion.
- {
- AutoGetCollectionForRead collection(_opCtx, NamespaceString::kIndexBuildEntryNamespace);
- ASSERT_TRUE(collection);
-
- // At the commitIndexBuild entry time, the index build be still be present in the
- // indexBuilds collection.
- {
- OneOffRead oor(_opCtx, indexBComplete);
- // Fails if the collection is empty.
- findOne(collection.getCollection());
- }
-
- // After the index build has finished, we should not see the doc in the indexBuilds
- // collection.
- ASSERT_EQUALS(0, itCount(collection.getCollection()));
- }
- }
-};
-
-class TimestampMultiIndexBuildsDuringRename : public StorageTimestampTest {
-public:
- void run() {
- auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
- auto durableCatalog = storageEngine->getCatalog();
-
- NamespaceString nss("unittests.timestampMultiIndexBuildsDuringRename");
- reset(nss);
-
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
-
- const LogicalTime insertTimestamp = _clock->tickClusterTime(1);
-
- WriteUnitOfWork wuow(_opCtx);
- insertDocument(autoColl.getCollection(),
- InsertStatement(BSON("_id" << 0 << "a" << 1 << "b" << 2 << "c" << 3),
- insertTimestamp.asTimestamp(),
- presentTerm));
- wuow.commit();
- ASSERT_EQ(1, itCount(autoColl.getCollection()));
-
- // Ensure we have a committed snapshot to avoid ReadConcernMajorityNotAvailableYet
- // error at the beginning of the the collection scan phase.
- auto snapshotManager = storageEngine->getSnapshotManager();
- snapshotManager->setCommittedSnapshot(insertTimestamp.asTimestamp());
- }
-
- DBDirectClient client(_opCtx);
- {
- // Disable index build commit quorum as we don't have support of replication subsystem
- // for voting.
- auto index1 = BSON("v" << kIndexVersion << "key" << BSON("a" << 1) << "name"
- << "a_1");
- auto index2 = BSON("v" << kIndexVersion << "key" << BSON("b" << 1) << "name"
- << "b_1");
- auto createIndexesCmdObj =
- BSON("createIndexes" << nss.coll() << "indexes" << BSON_ARRAY(index1 << index2)
- << "commitQuorum" << 0);
- BSONObj result;
- ASSERT(client.runCommand(nss.db().toString(), createIndexesCmdObj, result)) << result;
- }
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
-
- NamespaceString renamedNss("unittestsRename.timestampMultiIndexBuildsDuringRename");
- reset(renamedNss);
-
- // Save the pre-state idents so we can capture the specific ident related to index
- // creation.
- std::vector<std::string> origIdents = durableCatalog->getAllIdents(_opCtx);
-
- // Rename collection.
- BSONObj renameResult;
- ASSERT(client.runCommand(
- "admin",
- BSON("renameCollection" << nss.ns() << "to" << renamedNss.ns() << "dropTarget" << true),
- renameResult))
- << renameResult;
-
- NamespaceString tmpName;
- Timestamp indexCommitTs;
- // Empty temporary collections generate createIndexes oplog entry even if the node
- // supports 2 phase index build.
- const auto createIndexesDocument =
- queryOplog(BSON("ns" << renamedNss.db() + ".$cmd"
- << "o.createIndexes" << BSON("$exists" << true) << "o.name"
- << "b_1"));
- const auto tmpCollName =
- createIndexesDocument.getObjectField("o").getStringField("createIndexes");
- tmpName = NamespaceString(renamedNss.db(), tmpCollName);
- indexCommitTs = createIndexesDocument["ts"].timestamp();
- const Timestamp indexCreateInitTs = queryOplog(BSON("op"
- << "c"
- << "o.create" << tmpName.coll()))["ts"]
- .timestamp();
-
-
- // We expect one new collection ident and one new index ident (the _id index) during this
- // rename.
- assertRenamedCollectionIdentsAtTimestamp(
- durableCatalog, origIdents, /*expectedNewIndexIdents*/ 1, indexCreateInitTs);
-
- // We expect one new collection ident and three new index idents (including the _id index)
- // after this rename. The a_1 and b_1 index idents are created and persisted with the
- // "ready: true" write.
- assertRenamedCollectionIdentsAtTimestamp(
- durableCatalog, origIdents, /*expectedNewIndexIdents*/ 3, indexCommitTs);
-
- // Assert the 'a_1' and `b_1` indexes becomes ready at the last oplog entry time.
- RecordId renamedCatalogId = CollectionCatalog::get(_opCtx)
- ->lookupCollectionByNamespace(_opCtx, renamedNss)
- ->getCatalogId();
- ASSERT_TRUE(getIndexMetaData(
- getMetaDataAtTime(durableCatalog, renamedCatalogId, indexCommitTs), "a_1")
- .ready);
- ASSERT_TRUE(getIndexMetaData(
- getMetaDataAtTime(durableCatalog, renamedCatalogId, indexCommitTs), "b_1")
- .ready);
- }
-};
-
-/**
- * This test asserts that the catalog updates that represent the beginning and end of an aborted
- * index build are timestamped. The oplog should contain two entries startIndexBuild and
- * abortIndexBuild. We will inspect the catalog at the timestamp corresponding to each of these
- * oplog entries.
- */
-class TimestampAbortIndexBuild : public StorageTimestampTest {
-public:
- void run() {
- auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
- auto durableCatalog = storageEngine->getCatalog();
-
- // Create config.system.indexBuilds collection to store commit quorum value during index
- // building.
- ASSERT_OK(repl::StorageInterface::get(_opCtx)->dropCollection(
- _opCtx, NamespaceString::kIndexBuildEntryNamespace));
- ASSERT_OK(
- createCollection(_opCtx,
- NamespaceString::kIndexBuildEntryNamespace.db().toString(),
- BSON("create" << NamespaceString::kIndexBuildEntryNamespace.coll())));
-
- NamespaceString nss("unittests.timestampAbortIndexBuild");
- reset(nss);
-
- std::vector<std::string> origIdents;
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
-
- auto insertTimestamp1 = _clock->tickClusterTime(1);
- auto insertTimestamp2 = _clock->tickClusterTime(1);
-
- // Insert two documents with the same value for field 'a' so that
- // we will fail to create a unique index.
- WriteUnitOfWork wuow(_opCtx);
- insertDocument(autoColl.getCollection(),
- InsertStatement(BSON("_id" << 0 << "a" << 1),
- insertTimestamp1.asTimestamp(),
- presentTerm));
- insertDocument(autoColl.getCollection(),
- InsertStatement(BSON("_id" << 1 << "a" << 1),
- insertTimestamp2.asTimestamp(),
- presentTerm));
- wuow.commit();
- ASSERT_EQ(2, itCount(autoColl.getCollection()));
-
- // Save the pre-state idents so we can capture the specific ident related to index
- // creation.
- origIdents = durableCatalog->getAllIdents(_opCtx);
-
- // Ensure we have a committed snapshot to avoid ReadConcernMajorityNotAvailableYet
- // error at the beginning of the the collection scan phase.
- auto snapshotManager = storageEngine->getSnapshotManager();
- snapshotManager->setCommittedSnapshot(insertTimestamp2.asTimestamp());
- }
-
- {
- // Disable index build commit quorum as we don't have support of replication subsystem
- // for voting.
- auto index1 = BSON("v" << kIndexVersion << "key" << BSON("a" << 1) << "name"
- << "a_1"
- << "unique" << true);
- auto createIndexesCmdObj =
- BSON("createIndexes" << nss.coll() << "indexes" << BSON_ARRAY(index1)
- << "commitQuorum" << 0);
-
- DBDirectClient client(_opCtx);
- BSONObj result;
- ASSERT_FALSE(client.runCommand(nss.db().toString(), createIndexesCmdObj, result));
- ASSERT_EQUALS(ErrorCodes::DuplicateKey, getStatusFromCommandResult(result));
- }
-
- // Confirm that startIndexBuild and abortIndexBuild oplog entries have been written to the
- // oplog.
- auto indexStartDocument =
- queryOplog(BSON("ns" << nss.db() + ".$cmd"
- << "o.startIndexBuild" << nss.coll() << "o.indexes.0.name"
- << "a_1"));
- auto indexStartTs = indexStartDocument["ts"].timestamp();
- auto indexAbortDocument =
- queryOplog(BSON("ns" << nss.db() + ".$cmd"
- << "o.abortIndexBuild" << nss.coll() << "o.indexes.0.name"
- << "a_1"));
- auto indexAbortTs = indexAbortDocument["ts"].timestamp();
-
- // Check index state in catalog at oplog entry times for both startIndexBuild and
- // abortIndexBuild.
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
- RecordId catalogId = autoColl.getCollection()->getCatalogId();
-
- // We expect one new one new index ident during this index build.
- assertRenamedCollectionIdentsAtTimestamp(
- durableCatalog, origIdents, /*expectedNewIndexIdents*/ 1, indexStartTs);
- ASSERT_FALSE(
- getIndexMetaData(getMetaDataAtTime(durableCatalog, catalogId, indexStartTs), "a_1")
- .ready);
-
- // We expect all new idents to be removed after the index build has aborted.
- assertRenamedCollectionIdentsAtTimestamp(
- durableCatalog, origIdents, /*expectedNewIndexIdents*/ 0, indexAbortTs);
- assertIndexMetaDataMissing(getMetaDataAtTime(durableCatalog, catalogId, indexAbortTs),
- "a_1");
-
- // Assert that the index build is removed from config.system.indexBuilds collection after
- // completion.
- {
- AutoGetCollectionForRead collection(_opCtx, NamespaceString::kIndexBuildEntryNamespace);
- ASSERT_TRUE(collection);
-
- // At the commitIndexBuild entry time, the index build be still be present in the
- // indexBuilds collection.
- {
- OneOffRead oor(_opCtx, indexAbortTs);
- // Fails if the collection is empty.
- findOne(collection.getCollection());
- }
-
- // After the index build has finished, we should not see the doc in the indexBuilds
- // collection.
- ASSERT_EQUALS(0, itCount(collection.getCollection()));
- }
- }
-};
-
-class TimestampIndexDropsWildcard : public StorageTimestampTest {
-public:
- void run() {
- auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
- auto durableCatalog = storageEngine->getCatalog();
-
- NamespaceString nss("unittests.timestampIndexDrops");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
- CollectionWriter coll(_opCtx, autoColl);
-
- const LogicalTime insertTimestamp = _clock->tickClusterTime(1);
- {
- WriteUnitOfWork wuow(_opCtx);
- insertDocument(autoColl.getCollection(),
- InsertStatement(BSON("_id" << 0 << "a" << 1 << "b" << 2 << "c" << 3),
- insertTimestamp.asTimestamp(),
- presentTerm));
- wuow.commit();
- ASSERT_EQ(1, itCount(autoColl.getCollection()));
- }
-
-
- const Timestamp beforeIndexBuild = _clock->tickClusterTime(1).asTimestamp();
-
- // Save the pre-state idents so we can capture the specific ident related to index
- // creation.
- std::vector<std::string> origIdents = durableCatalog->getAllIdents(_opCtx);
-
- std::vector<Timestamp> afterCreateTimestamps;
- std::vector<std::string> indexIdents;
- // Create an index and get the ident for each index.
- for (auto key : {"a", "b", "c"}) {
- createIndex(coll, str::stream() << key << "_1", BSON(key << 1));
-
- // Timestamps at the completion of each index build.
- afterCreateTimestamps.push_back(_clock->tickClusterTime(1).asTimestamp());
-
- // Add the new ident to the vector and reset the current idents.
- indexIdents.push_back(
- getNewIndexIdentAtTime(durableCatalog, origIdents, Timestamp::min()));
- origIdents = durableCatalog->getAllIdents(_opCtx);
- }
-
- // Ensure each index is visible at the correct timestamp, and not before.
- for (size_t i = 0; i < indexIdents.size(); i++) {
- auto beforeTs = (i == 0) ? beforeIndexBuild : afterCreateTimestamps[i - 1];
- assertIdentsMissingAtTimestamp(durableCatalog, "", indexIdents[i], beforeTs);
- assertIdentsExistAtTimestamp(
- durableCatalog, "", indexIdents[i], afterCreateTimestamps[i]);
- }
-
- const auto currentTime = _clock->getTime();
- const LogicalTime beforeDropTs = currentTime.clusterTime();
-
- // Drop all of the indexes.
- dropIndexes(_opCtx, nss, boost::none, "*");
-
- // Assert that each index is dropped individually and with its own timestamp. The order of
- // dropping and creating are not guaranteed to be the same, but assert all of the created
- // indexes were also dropped.
- size_t nIdents = indexIdents.size();
- for (size_t i = 0; i < nIdents; i++) {
- OneOffRead oor(_opCtx, beforeDropTs.addTicks(i + 1).asTimestamp());
-
- auto ident = getDroppedIndexIdent(durableCatalog, origIdents);
- indexIdents.erase(std::remove(indexIdents.begin(), indexIdents.end(), ident));
-
- origIdents = durableCatalog->getAllIdents(_opCtx);
- }
- ASSERT_EQ(indexIdents.size(), 0ul) << "Dropped idents should match created idents";
- }
-};
-
-class TimestampIndexDropsListed : public StorageTimestampTest {
-public:
- void run() {
- auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
- auto durableCatalog = storageEngine->getCatalog();
-
- NamespaceString nss("unittests.timestampIndexDrops");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
- CollectionWriter coll(_opCtx, autoColl);
-
- const LogicalTime insertTimestamp = _clock->tickClusterTime(1);
- {
- WriteUnitOfWork wuow(_opCtx);
- insertDocument(autoColl.getCollection(),
- InsertStatement(BSON("_id" << 0 << "a" << 1 << "b" << 2 << "c" << 3),
- insertTimestamp.asTimestamp(),
- presentTerm));
- wuow.commit();
- ASSERT_EQ(1, itCount(autoColl.getCollection()));
- }
-
-
- const Timestamp beforeIndexBuild = _clock->tickClusterTime(1).asTimestamp();
-
- // Save the pre-state idents so we can capture the specific ident related to index
- // creation.
- std::vector<std::string> origIdents = durableCatalog->getAllIdents(_opCtx);
-
- std::vector<Timestamp> afterCreateTimestamps;
- std::vector<std::string> indexIdents;
- // Create an index and get the ident for each index.
- for (auto key : {"a", "b", "c"}) {
- createIndex(coll, str::stream() << key << "_1", BSON(key << 1));
-
- // Timestamps at the completion of each index build.
- afterCreateTimestamps.push_back(_clock->tickClusterTime(1).asTimestamp());
-
- // Add the new ident to the vector and reset the current idents.
- indexIdents.push_back(
- getNewIndexIdentAtTime(durableCatalog, origIdents, Timestamp::min()));
- origIdents = durableCatalog->getAllIdents(_opCtx);
- }
-
- // Ensure each index is visible at the correct timestamp, and not before.
- for (size_t i = 0; i < indexIdents.size(); i++) {
- auto beforeTs = (i == 0) ? beforeIndexBuild : afterCreateTimestamps[i - 1];
- assertIdentsMissingAtTimestamp(durableCatalog, "", indexIdents[i], beforeTs);
- assertIdentsExistAtTimestamp(
- durableCatalog, "", indexIdents[i], afterCreateTimestamps[i]);
- }
-
- const auto currentTime = _clock->getTime();
- const LogicalTime beforeDropTs = currentTime.clusterTime();
-
- // Drop all of the indexes.
- dropIndexes(_opCtx, nss, boost::none, std::vector<std::string>{"a_1", "b_1", "c_1"});
-
- // Assert that each index is dropped individually and with its own timestamp. The order of
- // dropping and creating are not guaranteed to be the same, but assert all of the created
- // indexes were also dropped.
- size_t nIdents = indexIdents.size();
- for (size_t i = 0; i < nIdents; i++) {
- OneOffRead oor(_opCtx, beforeDropTs.addTicks(i + 1).asTimestamp());
-
- auto ident = getDroppedIndexIdent(durableCatalog, origIdents);
- indexIdents.erase(std::remove(indexIdents.begin(), indexIdents.end(), ident));
-
- origIdents = durableCatalog->getAllIdents(_opCtx);
- }
- ASSERT_EQ(indexIdents.size(), 0ul) << "Dropped idents should match created idents";
- }
-};
-
-/**
- * Test specific OplogApplierImpl subclass that allows for custom applyOplogBatchPerWorker to be run
- * during multiApply.
- */
-class SecondaryReadsDuringBatchApplicationAreAllowedApplier : public repl::OplogApplierImpl {
-public:
- SecondaryReadsDuringBatchApplicationAreAllowedApplier(
- executor::TaskExecutor* executor,
- repl::OplogBuffer* oplogBuffer,
- Observer* observer,
- repl::ReplicationCoordinator* replCoord,
- repl::ReplicationConsistencyMarkers* consistencyMarkers,
- repl::StorageInterface* storageInterface,
- const OplogApplier::Options& options,
- ThreadPool* writerPool,
- OperationContext* opCtx,
- Promise<bool>* promise,
- stdx::future<bool>* taskFuture)
- : repl::OplogApplierImpl(executor,
- oplogBuffer,
- observer,
- replCoord,
- consistencyMarkers,
- storageInterface,
- options,
- writerPool),
- _testOpCtx(opCtx),
- _promise(promise),
- _taskFuture(taskFuture) {}
-
- Status applyOplogBatchPerWorker(OperationContext* opCtx,
- std::vector<const repl::OplogEntry*>* operationsToApply,
- WorkerMultikeyPathInfo* pathInfo,
- bool isDataConsistent) override;
-
-private:
- // Pointer to the test's op context. This is distinct from the op context used in
- // applyOplogBatchPerWorker.
- OperationContext* _testOpCtx;
- Promise<bool>* _promise;
- stdx::future<bool>* _taskFuture;
-};
-
-
-// This apply operation function will block until the reader has tried acquiring a collection lock.
-// This returns BadValue statuses instead of asserting so that the worker threads can cleanly exit
-// and this test case fails without crashing the entire suite.
-Status SecondaryReadsDuringBatchApplicationAreAllowedApplier::applyOplogBatchPerWorker(
- OperationContext* opCtx,
- std::vector<const repl::OplogEntry*>* operationsToApply,
- WorkerMultikeyPathInfo* pathInfo,
- const bool isDataConsistent) {
- if (!_testOpCtx->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_X)) {
- return {ErrorCodes::BadValue, "Batch applied was not holding PBWM lock in MODE_X"};
- }
-
- // Insert the document. A reader without a PBWM lock should not see it yet.
- const bool dataIsConsistent = true;
- auto status = OplogApplierImpl::applyOplogBatchPerWorker(
- opCtx, operationsToApply, pathInfo, dataIsConsistent);
- if (!status.isOK()) {
- return status;
- }
-
- // Signals the reader to acquire a collection read lock.
- _promise->emplaceValue(true);
-
- // Block while holding the PBWM lock until the reader is done.
- if (!_taskFuture->get()) {
- return {ErrorCodes::BadValue, "Client was holding PBWM lock in MODE_IS"};
- }
- return Status::OK();
-}
-
-class IndexBuildsResolveErrorsDuringStateChangeToPrimary : public StorageTimestampTest {
-public:
- void run() {
-
- NamespaceString nss("unittests.timestampIndexBuilds");
- reset(nss);
-
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
- CollectionWriter collection(_opCtx, autoColl);
-
- // Indexing of parallel arrays is not allowed, so these are deemed "bad".
- const auto badDoc1 =
- BSON("_id" << 0 << "a" << BSON_ARRAY(0 << 1) << "b" << BSON_ARRAY(0 << 1));
- const auto badDoc2 =
- BSON("_id" << 1 << "a" << BSON_ARRAY(2 << 3) << "b" << BSON_ARRAY(2 << 3));
- const auto badDoc3 =
- BSON("_id" << 2 << "a" << BSON_ARRAY(4 << 5) << "b" << BSON_ARRAY(4 << 5));
-
- // NOTE: This test does not test any timestamp reads.
- const LogicalTime insert1 = _clock->tickClusterTime(1);
- {
- LOGV2(22505, "inserting {badDoc1}", "badDoc1"_attr = badDoc1);
- WriteUnitOfWork wuow(_opCtx);
- insertDocument(collection.get(),
- InsertStatement(badDoc1, insert1.asTimestamp(), presentTerm));
- wuow.commit();
- }
-
- const LogicalTime insert2 = _clock->tickClusterTime(1);
- {
- LOGV2(22506, "inserting {badDoc2}", "badDoc2"_attr = badDoc2);
- WriteUnitOfWork wuow(_opCtx);
- insertDocument(collection.get(),
- InsertStatement(badDoc2, insert2.asTimestamp(), presentTerm));
- wuow.commit();
- }
-
- const IndexCatalogEntry* buildingIndex = nullptr;
- MultiIndexBlock indexer;
- ScopeGuard abortOnExit([&] {
- indexer.abortIndexBuild(_opCtx, collection, MultiIndexBlock::kNoopOnCleanUpFn);
- });
-
- // Provide a build UUID, indicating that this is a two-phase index build.
- const auto buildUUID = UUID::gen();
- indexer.setTwoPhaseBuildUUID(buildUUID);
-
- const LogicalTime indexInit = _clock->tickClusterTime(3);
-
- // First, simulate being a secondary. Indexing errors are ignored.
- {
- ASSERT_OK(_coordinatorMock->setFollowerMode({repl::MemberState::MS::RS_SECONDARY}));
- _coordinatorMock->alwaysAllowWrites(false);
- repl::UnreplicatedWritesBlock unreplicatedWrites(_opCtx);
-
- {
- TimestampBlock tsBlock(_opCtx, indexInit.asTimestamp());
-
- auto swSpecs = indexer.init(
- _opCtx,
- collection,
- {BSON("v" << 2 << "name"
- << "a_1_b_1"
- << "ns" << collection->ns().ns() << "key"
- << BSON("a" << 1 << "b" << 1))},
- MultiIndexBlock::makeTimestampedIndexOnInitFn(_opCtx, collection.get()));
- ASSERT_OK(swSpecs.getStatus());
- }
-
- auto indexCatalog = collection->getIndexCatalog();
- buildingIndex = indexCatalog->getEntry(
- indexCatalog->findIndexByName(_opCtx, "a_1_b_1", /* includeUnfinished */ true));
- ASSERT(buildingIndex);
-
- ASSERT_OK(indexer.insertAllDocumentsInCollection(_opCtx, collection.get()));
-
- ASSERT_TRUE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
-
- // There should be one skipped record from the collection scan.
- ASSERT_FALSE(buildingIndex->indexBuildInterceptor()
- ->getSkippedRecordTracker()
- ->areAllRecordsApplied(_opCtx));
- }
-
- // As a primary, stop ignoring indexing errors.
- ASSERT_OK(_coordinatorMock->setFollowerMode({repl::MemberState::MS::RS_PRIMARY}));
-
- {
- // This write will not succeed because the node is a primary and the document is not
- // indexable.
- LOGV2(22507, "attempting to insert {badDoc3}", "badDoc3"_attr = badDoc3);
- WriteUnitOfWork wuow(_opCtx);
- ASSERT_THROWS_CODE(
- collection->insertDocument(
- _opCtx,
- InsertStatement(badDoc3, indexInit.addTicks(1).asTimestamp(), presentTerm),
- /* opDebug */ nullptr,
- /* noWarn */ false),
- DBException,
- ErrorCodes::CannotIndexParallelArrays);
- wuow.commit();
- }
-
- // There should skipped records from failed collection scans and writes.
- ASSERT_FALSE(
- buildingIndex->indexBuildInterceptor()->getSkippedRecordTracker()->areAllRecordsApplied(
- _opCtx));
- // This fails because the bad record is still invalid.
- auto status = indexer.retrySkippedRecords(_opCtx, collection.get());
- ASSERT_EQ(status.code(), ErrorCodes::CannotIndexParallelArrays);
-
- ASSERT_FALSE(
- buildingIndex->indexBuildInterceptor()->getSkippedRecordTracker()->areAllRecordsApplied(
- _opCtx));
- ASSERT_TRUE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
-
- // Update one documents to be valid, and delete the other. These modifications are written
- // to the side writes table and must be drained.
- Helpers::upsert(_opCtx, collection->ns().ns(), BSON("_id" << 0 << "a" << 1 << "b" << 1));
- {
- RecordId badRecord = Helpers::findOne(
- _opCtx, collection.get(), BSON("_id" << 1), false /* requireIndex */);
- WriteUnitOfWork wuow(_opCtx);
- collection->deleteDocument(_opCtx, kUninitializedStmtId, badRecord, nullptr);
- wuow.commit();
- }
-
- ASSERT_FALSE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
- ASSERT_OK(indexer.drainBackgroundWrites(_opCtx,
- RecoveryUnit::ReadSource::kNoTimestamp,
- IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
-
-
- // This succeeds because the bad documents are now either valid or removed.
- ASSERT_OK(indexer.retrySkippedRecords(_opCtx, collection.get()));
- ASSERT_TRUE(
- buildingIndex->indexBuildInterceptor()->getSkippedRecordTracker()->areAllRecordsApplied(
- _opCtx));
- ASSERT_TRUE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
- ASSERT_OK(indexer.checkConstraints(_opCtx, collection.get()));
-
- {
- WriteUnitOfWork wuow(_opCtx);
- ASSERT_OK(indexer.commit(
- _opCtx,
- collection.getWritableCollection(),
- [&](const BSONObj& indexSpec) {
- _opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
- _opCtx, collection->ns(), collection->uuid(), indexSpec, false);
- },
- MultiIndexBlock::kNoopOnCommitFn));
- wuow.commit();
- }
- abortOnExit.dismiss();
- }
-};
-
-class SecondaryReadsDuringBatchApplicationAreAllowed : public StorageTimestampTest {
-public:
- void run() {
- ASSERT(_opCtx->getServiceContext()->getStorageEngine()->supportsReadConcernSnapshot());
-
- NamespaceString ns("unittest.secondaryReadsDuringBatchApplicationAreAllowed");
- reset(ns);
- UUID uuid = UUID::gen();
- {
- AutoGetCollectionForRead autoColl(_opCtx, ns);
- uuid = autoColl.getCollection()->uuid();
- ASSERT_EQ(itCount(autoColl.getCollection()), 0);
- }
-
- // Returns true when the batch has started, meaning the applier is holding the PBWM lock.
- // Will return false if the lock was not held.
- auto batchInProgress = makePromiseFuture<bool>();
- // Attempt to read when in the middle of a batch.
- stdx::packaged_task<bool()> task([&] {
- Client::initThread(getThreadName());
- auto readOp = cc().makeOperationContext();
-
- // Wait for the batch to start or fail.
- if (!batchInProgress.future.get()) {
- return false;
- }
- AutoGetCollectionForRead autoColl(readOp.get(), ns);
- return !readOp->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode,
- MODE_IS);
- });
- auto taskFuture = task.get_future();
- stdx::thread taskThread{std::move(task)};
-
- ScopeGuard joinGuard([&] {
- batchInProgress.promise.emplaceValue(false);
- taskThread.join();
- });
-
- // Make a simple insert operation.
- BSONObj doc0 = BSON("_id" << 0 << "a" << 0);
- auto insertOp = repl::OplogEntry(BSON("ts" << futureTs << "t" << 1LL << "v" << 2 << "op"
- << "i"
- << "ns" << ns.ns() << "ui" << uuid << "wall"
- << Date_t() << "o" << doc0));
- DoNothingOplogApplierObserver observer;
- // Apply the operation.
- auto storageInterface = repl::StorageInterface::get(_opCtx);
- auto writerPool = repl::makeReplWriterPool(1);
- SecondaryReadsDuringBatchApplicationAreAllowedApplier oplogApplier(
- nullptr, // task executor. not required for multiApply().
- nullptr, // oplog buffer. not required for multiApply().
- &observer,
- _coordinatorMock,
- _consistencyMarkers,
- storageInterface,
- repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary),
- writerPool.get(),
- _opCtx,
- &(batchInProgress.promise),
- &taskFuture);
- auto lastOpTime = unittest::assertGet(oplogApplier.applyOplogBatch(_opCtx, {insertOp}));
- ASSERT_EQ(insertOp.getOpTime(), lastOpTime);
-
- joinGuard.dismiss();
- taskThread.join();
-
- // Read on the local snapshot to verify the document was inserted.
- AutoGetCollectionForRead autoColl(_opCtx, ns);
- assertDocumentAtTimestamp(autoColl.getCollection(), futureTs, doc0);
- }
-};
-
-/**
- * This test exercises the code path in which a primary performs an index build via oplog
- * application of a createIndexes oplog entry. In this code path, a primary timestamps the
- * index build through applying the oplog entry, rather than creating an oplog entry.
- */
-class TimestampIndexOplogApplicationOnPrimary : public StorageTimestampTest {
-public:
- void run() {
- // Index builds expect a non-empty oplog and a valid committed snapshot.
- {
- Lock::GlobalLock lk(_opCtx, MODE_IX);
- WriteUnitOfWork wuow(_opCtx);
- auto service = _opCtx->getServiceContext();
- service->getOpObserver()->onOpMessage(_opCtx, BSONObj());
- wuow.commit();
-
- auto snapshotManager = service->getStorageEngine()->getSnapshotManager();
- auto lastAppliedOpTime =
- repl::ReplicationCoordinator::get(service)->getMyLastAppliedOpTime();
- snapshotManager->setCommittedSnapshot(lastAppliedOpTime.getTimestamp());
- }
-
- // In order for oplog application to assign timestamps, we must be in non-replicated mode
- // and disable document validation.
- repl::UnreplicatedWritesBlock uwb(_opCtx);
- DisableDocumentValidation validationDisabler(_opCtx);
-
- std::string dbName = "unittest";
- NamespaceString nss(dbName, "oplogApplicationOnPrimary");
- BSONObj doc = BSON("_id" << 1 << "field" << 1);
-
- const LogicalTime setupStart = _clock->tickClusterTime(1);
-
- UUID collUUID = UUID::gen();
- {
- // Create the collection and insert a document.
- reset(nss);
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- collUUID = autoColl.getCollection()->uuid();
- WriteUnitOfWork wuow(_opCtx);
- insertDocument(autoColl.getCollection(),
- InsertStatement(doc, setupStart.asTimestamp(), presentTerm));
- wuow.commit();
- }
-
-
- {
- // Sanity check everything exists.
- AutoGetCollectionForReadCommand coll(_opCtx, nss);
- ASSERT(coll);
-
- const auto currentTime = _clock->getTime();
- const auto presentTs = currentTime.clusterTime().asTimestamp();
- assertDocumentAtTimestamp(coll.getCollection(), presentTs, doc);
- }
-
- // Simulate a scenario where the node is a primary, but does not accept writes. This is
- // the only scenario in which a primary can do an index build via oplog application, since
- // the applyOps command no longer allows createIndexes (see SERVER-41554).
- _coordinatorMock->alwaysAllowWrites(false);
- {
- const auto beforeBuildTime = _clock->tickClusterTime(2);
- const auto startBuildTs = beforeBuildTime.addTicks(1).asTimestamp();
-
- // Grab the existing idents to identify the ident created by the index build.
- auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
- auto durableCatalog = storageEngine->getCatalog();
- std::vector<std::string> origIdents;
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
- origIdents = durableCatalog->getAllIdents(_opCtx);
- }
-
- auto keyPattern = BSON("field" << 1);
- auto startBuildOpTime = repl::OpTime(startBuildTs, presentTerm);
- UUID indexBuildUUID = UUID::gen();
-
- // Wait for the index build thread to start the collection scan before proceeding with
- // checking the catalog and applying the commitIndexBuild oplog entry.
- // There is a potential race between applying the commitIndexBuild oplog entry and the
- // transitioning the index build thread's ReplIndexBuildState from kSetup to
- // kInProgress. This is due to the commit retry logic using the ClockSourceMock, rather
- // than an actual system clock that advances automatically, through OperationContext's
- // waitFor() function.
- {
- FailPointEnableBlock fpb("hangAfterStartingIndexBuild");
-
- auto start = repl::makeStartIndexBuildOplogEntry(
- startBuildOpTime, nss, "field_1", keyPattern, collUUID, indexBuildUUID);
- const bool dataIsConsistent = true;
- ASSERT_OK(repl::applyOplogEntryOrGroupedInserts(
- _opCtx, &start, repl::OplogApplication::Mode::kSecondary, dataIsConsistent));
-
- // We cannot use the OperationContext to wait for the thread to reach the fail point
- // because it also uses the ClockSourceMock.
- fpb->waitForTimesEntered(Interruptible::notInterruptible(),
- fpb.initialTimesEntered() + 1);
- }
-
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
- const std::string indexIdent =
- getNewIndexIdentAtTime(durableCatalog, origIdents, Timestamp::min());
- assertIdentsMissingAtTimestamp(
- durableCatalog, "", indexIdent, beforeBuildTime.asTimestamp());
- assertIdentsExistAtTimestamp(durableCatalog, "", indexIdent, startBuildTs);
-
- // The index has not committed yet, so it is not ready.
- RecordId catalogId = autoColl.getCollection()->getCatalogId();
- ASSERT_FALSE(
- getIndexMetaData(getMetaDataAtTime(durableCatalog, catalogId, startBuildTs),
- "field_1")
- .ready);
- } // release read lock so commit index build oplog entry can take its own locks.
-
- auto commit = repl::makeCommitIndexBuildOplogEntry(
- startBuildOpTime, nss, "field_1", keyPattern, collUUID, indexBuildUUID);
- const bool dataIsConsistent = true;
- ASSERT_OK(repl::applyOplogEntryOrGroupedInserts(
- _opCtx, &commit, repl::OplogApplication::Mode::kSecondary, dataIsConsistent));
-
- // Reacquire read lock to check index metadata.
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
- RecordId catalogId = autoColl.getCollection()->getCatalogId();
- ASSERT_TRUE(getIndexMetaData(getMetaDataAtTime(durableCatalog, catalogId, startBuildTs),
- "field_1")
- .ready);
- }
- }
-};
-
-class ViewCreationSeparateTransaction : public StorageTimestampTest {
-public:
- void run() {
- auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
- auto durableCatalog = storageEngine->getCatalog();
-
- const NamespaceString backingCollNss("unittests.backingColl");
- reset(backingCollNss);
-
- const NamespaceString viewNss("unittests.view");
- const NamespaceString systemViewsNss("unittests.system.views");
-
- ASSERT_OK(createCollection(_opCtx,
- viewNss.db().toString(),
- BSON("create" << viewNss.coll() << "pipeline" << BSONArray()
- << "viewOn" << backingCollNss.coll())));
-
- const Timestamp systemViewsCreateTs = queryOplog(BSON("op"
- << "c"
- << "ns" << (viewNss.db() + ".$cmd")
- << "o.create"
- << "system.views"))["ts"]
- .timestamp();
- const Timestamp viewCreateTs = queryOplog(BSON("op"
- << "i"
- << "ns" << systemViewsNss.ns() << "o._id"
- << viewNss.ns()))["ts"]
- .timestamp();
-
- {
- Lock::GlobalRead read(_opCtx);
- AutoGetCollection autoColl(_opCtx, systemViewsNss, LockMode::MODE_IS);
- RecordId catalogId = autoColl.getCollection()->getCatalogId();
-
- auto systemViewsMd = getMetaDataAtTime(
- durableCatalog, catalogId, Timestamp(systemViewsCreateTs.asULL() - 1));
- ASSERT(systemViewsMd == nullptr)
- << systemViewsNss
- << " incorrectly exists before creation. CreateTs: " << systemViewsCreateTs;
-
- systemViewsMd = getMetaDataAtTime(durableCatalog, catalogId, systemViewsCreateTs);
- TenantNamespace tenantNs = systemViewsMd->tenantNs;
- ASSERT_EQ(systemViewsNss.ns(), tenantNs.getNss().ns());
-
- assertDocumentAtTimestamp(autoColl.getCollection(), systemViewsCreateTs, BSONObj());
- assertDocumentAtTimestamp(autoColl.getCollection(),
- viewCreateTs,
- BSON("_id" << viewNss.ns() << "viewOn"
- << backingCollNss.coll() << "pipeline"
- << BSONArray()));
- }
- }
-};
-
-class CreateCollectionWithSystemIndex : public StorageTimestampTest {
-public:
- void run() {
- // Only run on 'wiredTiger'. No other storage engines to-date support timestamp writes.
- if (!(mongo::storageGlobalParams.engine == "wiredTiger" &&
- mongo::serverGlobalParams.enableMajorityReadConcern)) {
- return;
- }
-
- NamespaceString nss("admin.system.users");
-
- { ASSERT_FALSE(AutoGetCollectionForReadCommand(_opCtx, nss).getCollection()); }
-
- ASSERT_OK(createCollection(_opCtx, nss.db().toString(), BSON("create" << nss.coll())));
-
- RecordId catalogId;
- {
- AutoGetCollectionForReadCommand coll(_opCtx, nss);
- ASSERT(coll.getCollection());
- catalogId = coll.getCollection()->getCatalogId();
- }
-
- BSONObj result = queryOplog(BSON("op"
- << "c"
- << "ns" << nss.getCommandNS().ns() << "o.create"
- << nss.coll()));
- repl::OplogEntry op(result);
- // The logOp() call for createCollection should have timestamp 'futureTs', which will also
- // be the timestamp at which we do the write which creates the collection. Thus we expect
- // the collection to appear at 'futureTs' and not before.
- ASSERT_EQ(op.getTimestamp(), futureTs) << op.toBSONForLogging();
-
- // The index build emits three oplog entries.
- Timestamp indexStartTs;
- Timestamp indexCreateTs;
- Timestamp indexCompleteTs;
- // Empty collections generate createIndexes oplog entry even if the node
- // supports 2 phase index build.
- indexStartTs = op.getTimestamp();
- indexCreateTs =
- repl::OplogEntry(queryOplog(BSON("op"
- << "c"
- << "ns" << nss.getCommandNS().ns() << "o.createIndexes"
- << nss.coll() << "o.name"
- << "user_1_db_1")))
- .getTimestamp();
- indexCompleteTs = indexCreateTs;
-
- assertNamespaceInIdents(nss, pastTs, false);
- assertNamespaceInIdents(nss, presentTs, false);
- assertNamespaceInIdents(nss, futureTs, true);
- assertNamespaceInIdents(nss, indexStartTs, true);
- if (!indexCreateTs.isNull()) {
- assertNamespaceInIdents(nss, indexCreateTs, true);
- }
- assertNamespaceInIdents(nss, indexCompleteTs, true);
- assertNamespaceInIdents(nss, nullTs, true);
-
- ASSERT_GT(indexCompleteTs, futureTs);
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
- auto storageEngine = _opCtx->getServiceContext()->getStorageEngine();
- auto durableCatalog = storageEngine->getCatalog();
- auto indexIdent = durableCatalog->getIndexIdent(_opCtx, catalogId, "user_1_db_1");
- assertIdentsMissingAtTimestamp(durableCatalog, "", indexIdent, pastTs);
- assertIdentsMissingAtTimestamp(durableCatalog, "", indexIdent, presentTs);
- assertIdentsMissingAtTimestamp(durableCatalog, "", indexIdent, futureTs);
- // This is the timestamp of the startIndexBuild oplog entry, which is timestamped before the
- // index is created as part of the createIndexes oplog entry.
- assertIdentsMissingAtTimestamp(durableCatalog, "", indexIdent, indexStartTs);
- if (!indexCreateTs.isNull()) {
- assertIdentsExistAtTimestamp(durableCatalog, "", indexIdent, indexCreateTs);
- }
- assertIdentsExistAtTimestamp(durableCatalog, "", indexIdent, indexCompleteTs);
- assertIdentsExistAtTimestamp(durableCatalog, "", indexIdent, nullTs);
- }
-};
-
-class RetryableFindAndModifyTest : public StorageTimestampTest {
-public:
- const StringData dbName = "unittest"_sd;
- const BSONObj oldObj = BSON("_id" << 0 << "a" << 1);
-
- RetryableFindAndModifyTest(const std::string& collName) : nss(dbName, collName) {
- auto service = _opCtx->getServiceContext();
- auto sessionCatalog = SessionCatalog::get(service);
- sessionCatalog->reset_forTest();
- MongoDSessionCatalog::onStepUp(_opCtx);
-
- reset(nss);
- UUID ui = UUID::gen();
-
- {
- AutoGetCollection coll(_opCtx, nss, LockMode::MODE_IX);
- ASSERT(coll);
- ui = coll->uuid();
- }
-
- const auto currentTime = _clock->getTime();
- currentTs = currentTime.clusterTime().asTimestamp();
- insertTs = currentTime.clusterTime().asTimestamp() + 1;
- beforeOplogTs = insertTs + 1;
- oplogTs = insertTs + 2;
- // This test does not run a real ReplicationCoordinator, so must advance the snapshot
- // manager manually.
- auto storageEngine = cc().getServiceContext()->getStorageEngine();
- storageEngine->getSnapshotManager()->setLastApplied(insertTs);
-
- const auto sessionId = makeLogicalSessionIdForTest();
- _opCtx->setLogicalSessionId(sessionId);
- const auto txnNumber = 10;
- _opCtx->setTxnNumber(txnNumber);
-
- ocs.emplace(_opCtx);
-
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- WriteUnitOfWork wunit(_opCtx);
- insertDocument(autoColl.getCollection(), InsertStatement(oldObj));
- wunit.commit();
- }
- assertOplogDocumentExistsAtTimestamp(BSON("ts" << insertTs << "op"
- << "i"),
- insertTs,
- true);
-
- storageEngine->getSnapshotManager()->setLastApplied(insertTs);
-
- auto txnParticipant = TransactionParticipant::get(_opCtx);
- ASSERT(txnParticipant);
- // Start a retryable write.
- txnParticipant.beginOrContinue(
- _opCtx, {txnNumber}, boost::none /* autocommit */, boost::none /* startTransaction */);
- }
-
-protected:
- NamespaceString nss;
- Timestamp currentTs;
- Timestamp insertTs;
- Timestamp beforeOplogTs;
- Timestamp oplogTs;
-
- boost::optional<MongoDOperationContextSession> ocs;
-};
-
-class RetryableFindAndModifyUpdate : RetryableFindAndModifyTest {
-public:
- RetryableFindAndModifyUpdate() : RetryableFindAndModifyTest("RetryableFindAndModifyUpdate") {}
-
- void run() {
- RAIIServerParameterControllerForTest ffRaii("featureFlagRetryableFindAndModify", true);
- RAIIServerParameterControllerForTest storeImageInSideCollection(
- "storeFindAndModifyImagesInSideCollection", true);
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
- CollectionWriter collection(_opCtx, autoColl);
- const auto newObj = BSON("_id" << 0 << "a" << 1 << "b" << 1);
- CollectionUpdateArgs args;
- args.stmtIds = {1};
- args.preImageDoc = oldObj;
- args.updatedDoc = newObj;
- args.storeDocOption = CollectionUpdateArgs::StoreDocOption::PreImage;
- args.update = BSON("$set" << BSON("b" << 1));
- args.criteria = BSON("_id" << 0);
-
- {
- auto cursor = collection->getCursor(_opCtx);
- auto record = cursor->next();
- invariant(record);
- WriteUnitOfWork wuow(_opCtx);
- collection->updateDocument(
- _opCtx,
- record->id,
- Snapshotted<BSONObj>(_opCtx->recoveryUnit()->getSnapshotId(), oldObj),
- newObj,
- false,
- nullptr,
- &args);
- wuow.commit();
- }
-
- // There should be no oplog entry at 'beforeOplogTs'.
- const auto beforeOplogTsFilter = BSON("ts" << beforeOplogTs);
- assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, currentTs, false);
- assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, beforeOplogTs, false);
- assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, oplogTs, false);
-
- const auto oplogTsFilter = BSON("ts" << oplogTs << "op"
- << "u");
- assertOplogDocumentExistsAtTimestamp(oplogTsFilter, currentTs, false);
- assertOplogDocumentExistsAtTimestamp(oplogTsFilter, beforeOplogTs, false);
- assertOplogDocumentExistsAtTimestamp(oplogTsFilter, oplogTs, true);
- }
-};
-
-class RetryableFindAndModifyUpdateWithDamages : RetryableFindAndModifyTest {
-public:
- RetryableFindAndModifyUpdateWithDamages()
- : RetryableFindAndModifyTest("RetryableFindAndModifyUpdateWithDamages") {}
-
- void run() {
- namespace mmb = mongo::mutablebson;
- RAIIServerParameterControllerForTest ffRaii("featureFlagRetryableFindAndModify", true);
- RAIIServerParameterControllerForTest storeImageInSideCollection(
- "storeFindAndModifyImagesInSideCollection", true);
- const auto bsonObj = BSON("_id" << 0 << "a" << 1);
- // Create a new document representing BSONObj with the above contents.
- mmb::Document doc(bsonObj, mmb::Document::kInPlaceEnabled);
-
- mmb::DamageVector damages;
- const char* source = nullptr;
- size_t size = 0;
- ASSERT_TRUE(doc.getInPlaceUpdates(&damages, &source, &size));
-
- // Enable in-place mutation for this document
- ASSERT_EQUALS(mmb::Document::kInPlaceEnabled, doc.getCurrentInPlaceMode());
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
- CollectionWriter collection(_opCtx, autoColl);
- const auto newObj = BSON("_id" << 0 << "a" << 0);
- CollectionUpdateArgs args;
- args.stmtIds = {1};
- args.preImageDoc = oldObj;
- args.updatedDoc = newObj;
- args.storeDocOption = CollectionUpdateArgs::StoreDocOption::PreImage;
- args.update = BSON("$set" << BSON("a" << 0));
- args.criteria = BSON("_id" << 0);
-
- {
- Snapshotted<BSONObj> objSnapshot(_opCtx->recoveryUnit()->getSnapshotId(), oldObj);
- const RecordData oldRec(objSnapshot.value().objdata(), objSnapshot.value().objsize());
- Snapshotted<RecordData> recordSnapshot(objSnapshot.snapshotId(), oldRec);
- auto cursor = collection->getCursor(_opCtx);
- auto record = cursor->next();
- invariant(record);
- WriteUnitOfWork wuow(_opCtx);
- const auto statusWith = collection->updateDocumentWithDamages(
- _opCtx, record->id, std::move(recordSnapshot), source, damages, &args);
- wuow.commit();
- ASSERT_OK(statusWith.getStatus());
- }
-
- // There should be no oplog entry at 'beforeOplogTs'.
- const auto beforeOplogTsFilter = BSON("ts" << beforeOplogTs);
- assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, currentTs, false);
- assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, beforeOplogTs, false);
- assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, oplogTs, false);
-
- const auto tsFilter = BSON("ts" << oplogTs << "op"
- << "u");
- assertOplogDocumentExistsAtTimestamp(tsFilter, currentTs, false);
- assertOplogDocumentExistsAtTimestamp(tsFilter, beforeOplogTs, false);
- assertOplogDocumentExistsAtTimestamp(tsFilter, oplogTs, true);
- }
-};
-
-class RetryableFindAndModifyDelete : RetryableFindAndModifyTest {
-public:
- RetryableFindAndModifyDelete() : RetryableFindAndModifyTest("RetryableFindAndModifyDelete") {}
-
- void run() {
- namespace mmb = mongo::mutablebson;
- RAIIServerParameterControllerForTest ffRaii("featureFlagRetryableFindAndModify", true);
- RAIIServerParameterControllerForTest storeImageInSideCollection(
- "storeFindAndModifyImagesInSideCollection", true);
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X);
- CollectionWriter collection(_opCtx, autoColl);
- const auto bsonObj = BSON("_id" << 0 << "a" << 1);
-
- {
- Snapshotted<BSONObj> objSnapshot(_opCtx->recoveryUnit()->getSnapshotId(), oldObj);
- auto cursor = collection->getCursor(_opCtx);
- auto record = cursor->next();
- invariant(record);
- WriteUnitOfWork wuow(_opCtx);
- collection->deleteDocument(_opCtx,
- objSnapshot,
- 1,
- record->id,
- nullptr,
- false,
- false,
- Collection::StoreDeletedDoc::On);
- wuow.commit();
- }
-
- // There should be no oplog entry at 'beforeOplogTs'.
- const auto beforeOplogTsFilter = BSON("ts" << beforeOplogTs);
- assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, currentTs, false);
- assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, beforeOplogTs, false);
- assertOplogDocumentExistsAtTimestamp(beforeOplogTsFilter, oplogTs, false);
-
- const auto tsFilter = BSON("ts" << oplogTs << "op"
- << "d");
- assertOplogDocumentExistsAtTimestamp(tsFilter, currentTs, false);
- assertOplogDocumentExistsAtTimestamp(tsFilter, beforeOplogTs, false);
- assertOplogDocumentExistsAtTimestamp(tsFilter, oplogTs, true);
- }
-};
-
-class MultiDocumentTransactionTest : public StorageTimestampTest {
-public:
- const StringData dbName = "unittest"_sd;
- const BSONObj doc = BSON("_id" << 1 << "TestValue" << 1);
- const BSONObj docKey = BSON("_id" << 1);
-
- MultiDocumentTransactionTest(const std::string& collName) : nss(dbName, collName) {
- auto service = _opCtx->getServiceContext();
- auto sessionCatalog = SessionCatalog::get(service);
- sessionCatalog->reset_forTest();
- MongoDSessionCatalog::onStepUp(_opCtx);
-
- reset(nss);
- UUID ui = UUID::gen();
- {
- AutoGetCollection coll(_opCtx, nss, LockMode::MODE_IX);
- ASSERT(coll);
- ui = coll->uuid();
- }
-
- const auto currentTime = _clock->getTime();
- presentTs = currentTime.clusterTime().asTimestamp();
- // This test does not run a real ReplicationCoordinator, so must advance the snapshot
- // manager manually.
- auto storageEngine = cc().getServiceContext()->getStorageEngine();
- storageEngine->getSnapshotManager()->setLastApplied(presentTs);
- const auto beforeTxnTime = _clock->tickClusterTime(1);
- beforeTxnTs = beforeTxnTime.asTimestamp();
- commitEntryTs = beforeTxnTime.addTicks(1).asTimestamp();
-
- const auto sessionId = makeLogicalSessionIdForTest();
- _opCtx->setLogicalSessionId(sessionId);
- _opCtx->setTxnNumber(26);
- _opCtx->setInMultiDocumentTransaction();
-
- ocs.emplace(_opCtx);
-
- auto txnParticipant = TransactionParticipant::get(_opCtx);
- ASSERT(txnParticipant);
-
- txnParticipant.beginOrContinue(
- _opCtx, {*_opCtx->getTxnNumber()}, false /* autocommit */, true /* startTransaction */);
- txnParticipant.unstashTransactionResources(_opCtx, "insert");
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- insertDocument(autoColl.getCollection(), InsertStatement(doc));
- }
- txnParticipant.stashTransactionResources(_opCtx);
-
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
- const auto& coll = autoColl.getCollection();
- assertDocumentAtTimestamp(coll, presentTs, BSONObj());
- assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
- assertDocumentAtTimestamp(coll, commitEntryTs, BSONObj());
- assertDocumentAtTimestamp(coll, nullTs, BSONObj());
-
- const auto commitFilter = BSON("ts" << commitEntryTs);
- assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, nullTs, false);
- }
- }
-
- void logTimestamps() const {
- LOGV2(22508, "Present TS: {presentTs}", "presentTs"_attr = presentTs);
- LOGV2(22509, "Before transaction TS: {beforeTxnTs}", "beforeTxnTs"_attr = beforeTxnTs);
- LOGV2(22510, "Commit entry TS: {commitEntryTs}", "commitEntryTs"_attr = commitEntryTs);
- }
-
- BSONObj getSessionTxnInfoAtTimestamp(const Timestamp& ts, bool expected) {
- AutoGetCollection autoColl(
- _opCtx, NamespaceString::kSessionTransactionsTableNamespace, LockMode::MODE_IX);
- const auto sessionId = *_opCtx->getLogicalSessionId();
- const auto txnNum = *_opCtx->getTxnNumber();
- BSONObj doc;
- OneOffRead oor(_opCtx, ts);
- bool found = Helpers::findOne(_opCtx,
- autoColl.getCollection(),
- BSON("_id" << sessionId.toBSON() << "txnNum" << txnNum),
- doc);
- if (expected) {
- ASSERT(found) << "Missing session transaction info at " << ts;
- } else {
- ASSERT_FALSE(found) << "Session transaction info at " << ts
- << " is unexpectedly present " << doc;
- }
- return doc;
- }
-
-protected:
- NamespaceString nss;
- Timestamp presentTs;
- Timestamp beforeTxnTs;
- Timestamp commitEntryTs;
-
- boost::optional<MongoDOperationContextSession> ocs;
-};
-
-class MultiDocumentTransaction : public MultiDocumentTransactionTest {
-public:
- MultiDocumentTransaction() : MultiDocumentTransactionTest("multiDocumentTransaction") {}
-
- void run() {
- auto txnParticipant = TransactionParticipant::get(_opCtx);
- ASSERT(txnParticipant);
- logTimestamps();
-
- txnParticipant.unstashTransactionResources(_opCtx, "insert");
-
- txnParticipant.commitUnpreparedTransaction(_opCtx);
-
- txnParticipant.stashTransactionResources(_opCtx);
- assertNoStartOpTime();
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- const auto& coll = autoColl.getCollection();
- assertDocumentAtTimestamp(coll, presentTs, BSONObj());
- assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
- assertDocumentAtTimestamp(coll, commitEntryTs, doc);
- assertDocumentAtTimestamp(coll, nullTs, doc);
-
- const auto commitFilter = BSON("ts" << commitEntryTs);
- assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(commitFilter, nullTs, true);
- }
- }
-};
-
-// Including this class in a test fixture forces transactions to use one oplog entry per operation
-// instead of packing them into as few oplog entries as fit. This allows testing of the timestamps
-// of multi-oplog-entry transactions.
-class MultiOplogScopedSettings {
-public:
- MultiOplogScopedSettings()
- : _prevPackingLimit(gMaxNumberOfTransactionOperationsInSingleOplogEntry) {
- gMaxNumberOfTransactionOperationsInSingleOplogEntry = 1;
- }
- ~MultiOplogScopedSettings() {
- gMaxNumberOfTransactionOperationsInSingleOplogEntry = _prevPackingLimit;
- }
-
-private:
- int _prevPackingLimit;
-};
-
-class MultiOplogEntryTransaction : public MultiDocumentTransactionTest {
-public:
- MultiOplogEntryTransaction() : MultiDocumentTransactionTest("multiOplogEntryTransaction") {
- const auto currentTime = _clock->getTime();
- const auto clusterTime = currentTime.clusterTime();
- firstOplogEntryTs = clusterTime.addTicks(1).asTimestamp();
- commitEntryTs = clusterTime.addTicks(2).asTimestamp();
- }
-
- void run() {
- auto txnParticipant = TransactionParticipant::get(_opCtx);
- ASSERT(txnParticipant);
- logTimestamps();
-
- txnParticipant.unstashTransactionResources(_opCtx, "insert");
-
- const BSONObj doc2 = BSON("_id" << 2 << "TestValue" << 2);
- const BSONObj doc2Key = BSON("_id" << 2);
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- insertDocument(autoColl.getCollection(), InsertStatement(doc2));
- }
- txnParticipant.commitUnpreparedTransaction(_opCtx);
-
- txnParticipant.stashTransactionResources(_opCtx);
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- const BSONObj query1 = BSON("_id" << 1);
- const BSONObj query2 = BSON("_id" << 2);
- const auto& coll = autoColl.getCollection();
-
- // Collection should be empty until commit, at which point both documents
- // should show up.
- assertDocumentAtTimestamp(coll, presentTs, BSONObj());
- assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
- assertDocumentAtTimestamp(coll, firstOplogEntryTs, BSONObj());
- assertFilteredDocumentAtTimestamp(coll, query1, commitEntryTs, doc);
- assertFilteredDocumentAtTimestamp(coll, query2, commitEntryTs, doc2);
- assertFilteredDocumentAtTimestamp(coll, query1, nullTs, doc);
- assertFilteredDocumentAtTimestamp(coll, query2, nullTs, doc2);
-
- // Implicit commit oplog entry should exist at commitEntryTs.
- const auto commitFilter =
- BSON("ts" << commitEntryTs << "o"
- << BSON("applyOps"
- << BSON_ARRAY(BSON("op"
- << "i"
- << "ns" << nss.ns() << "ui" << coll->uuid()
- << "o" << doc2 << "o2" << doc2Key))
- << "count" << 2));
- assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, firstOplogEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(commitFilter, nullTs, true);
-
- // Check that the oldestActiveTxnTimestamp properly accounts for in-progress
- // transactions.
- assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
- assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
- assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, firstOplogEntryTs);
- assertOldestActiveTxnTimestampEquals(boost::none, commitEntryTs);
- assertOldestActiveTxnTimestampEquals(boost::none, nullTs);
-
- // first oplog entry should exist at firstOplogEntryTs and after it.
- const auto firstOplogEntryFilter =
- BSON("ts" << firstOplogEntryTs << "o"
- << BSON("applyOps"
- << BSON_ARRAY(BSON("op"
- << "i"
- << "ns" << nss.ns() << "ui" << coll->uuid()
- << "o" << doc << "o2" << docKey))
- << "partialTxn" << true));
- assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, firstOplogEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, commitEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, nullTs, true);
-
- // Session state should go to inProgress at firstOplogEntryTs, then to committed
- // at commitEntryTs
- getSessionTxnInfoAtTimestamp(presentTs, false);
- getSessionTxnInfoAtTimestamp(beforeTxnTs, false);
- auto sessionInfo = getSessionTxnInfoAtTimestamp(firstOplogEntryTs, true);
- ASSERT_EQ(sessionInfo["state"].String(), "inProgress");
- ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), firstOplogEntryTs);
- ASSERT_EQ(sessionInfo["startOpTime"]["ts"].timestamp(), firstOplogEntryTs);
-
- sessionInfo = getSessionTxnInfoAtTimestamp(commitEntryTs, true);
- ASSERT_EQ(sessionInfo["state"].String(), "committed");
- ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), commitEntryTs);
- ASSERT_FALSE(sessionInfo.hasField("startOpTime"));
-
- sessionInfo = getSessionTxnInfoAtTimestamp(nullTs, true);
- ASSERT_EQ(sessionInfo["state"].String(), "committed");
- ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), commitEntryTs);
- ASSERT_FALSE(sessionInfo.hasField("startOpTime"));
- }
- }
-
-protected:
- Timestamp firstOplogEntryTs;
-
-private:
- MultiOplogScopedSettings multiOplogSettings;
-};
-
-class CommitPreparedMultiOplogEntryTransaction : public MultiDocumentTransactionTest {
-public:
- CommitPreparedMultiOplogEntryTransaction()
- : MultiDocumentTransactionTest("preparedMultiOplogEntryTransaction") {
- const auto currentTime = _clock->getTime();
- const auto clusterTime = currentTime.clusterTime();
- firstOplogEntryTs = clusterTime.addTicks(1).asTimestamp();
- prepareEntryTs = clusterTime.addTicks(2).asTimestamp();
- commitTs = clusterTime.addTicks(3).asTimestamp();
- commitEntryTs = clusterTime.addTicks(4).asTimestamp();
- }
-
- void run() {
- auto txnParticipant = TransactionParticipant::get(_opCtx);
- ASSERT(txnParticipant);
- LOGV2(22511, "PrepareTS: {prepareEntryTs}", "prepareEntryTs"_attr = prepareEntryTs);
- logTimestamps();
-
- const auto prepareFilter = BSON("ts" << prepareEntryTs);
- const auto commitFilter = BSON("ts" << commitEntryTs);
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
- const auto& coll = autoColl.getCollection();
- assertDocumentAtTimestamp(coll, presentTs, BSONObj());
- assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
- assertDocumentAtTimestamp(coll, firstOplogEntryTs, BSONObj());
- assertDocumentAtTimestamp(coll, prepareEntryTs, BSONObj());
- assertDocumentAtTimestamp(coll, commitEntryTs, BSONObj());
-
- assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, firstOplogEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, commitEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, nullTs, false);
-
- assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, prepareEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, nullTs, false);
-
- assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
- assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
- assertOldestActiveTxnTimestampEquals(boost::none, prepareEntryTs);
- assertOldestActiveTxnTimestampEquals(boost::none, commitEntryTs);
- assertOldestActiveTxnTimestampEquals(boost::none, nullTs);
- }
- txnParticipant.unstashTransactionResources(_opCtx, "insert");
- const BSONObj doc2 = BSON("_id" << 2 << "TestValue" << 2);
- const BSONObj doc2Key = BSON("_id" << 2);
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- insertDocument(autoColl.getCollection(), InsertStatement(doc2));
- }
- txnParticipant.prepareTransaction(_opCtx, {});
-
- const BSONObj query1 = BSON("_id" << 1);
- const BSONObj query2 = BSON("_id" << 2);
-
- txnParticipant.stashTransactionResources(_opCtx);
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
- const auto& coll = autoColl.getCollection();
- assertDocumentAtTimestamp(coll, presentTs, BSONObj());
- assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
- assertDocumentAtTimestamp(coll, firstOplogEntryTs, BSONObj());
-
- {
- IgnorePrepareBlock ignorePrepare(_opCtx);
- // Perform the following while ignoring prepare conflicts. These calls would
- // otherwise wait forever until the prepared transaction committed or aborted.
- assertDocumentAtTimestamp(coll, prepareEntryTs, BSONObj());
- assertDocumentAtTimestamp(coll, commitEntryTs, BSONObj());
- assertDocumentAtTimestamp(coll, nullTs, BSONObj());
- }
-
- assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, firstOplogEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, commitEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, nullTs, true);
-
- // We haven't committed the prepared transaction
- assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, firstOplogEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, prepareEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, nullTs, false);
-
- assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
- assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
- assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, firstOplogEntryTs);
- assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, prepareEntryTs);
- // The transaction has not been committed yet and is still considered active.
- assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, commitEntryTs);
- assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, nullTs);
- }
-
- txnParticipant.unstashTransactionResources(_opCtx, "commitTransaction");
-
- {
- FailPointEnableBlock failPointBlock("skipCommitTxnCheckPrepareMajorityCommitted");
- txnParticipant.commitPreparedTransaction(_opCtx, commitTs, {});
- }
-
- txnParticipant.stashTransactionResources(_opCtx);
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- const auto& coll = autoColl.getCollection();
- assertDocumentAtTimestamp(coll, presentTs, BSONObj());
- assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
- assertDocumentAtTimestamp(coll, firstOplogEntryTs, BSONObj());
- assertDocumentAtTimestamp(coll, prepareEntryTs, BSONObj());
- assertFilteredDocumentAtTimestamp(coll, query1, commitEntryTs, doc);
- assertFilteredDocumentAtTimestamp(coll, query2, commitEntryTs, doc2);
- assertFilteredDocumentAtTimestamp(coll, query1, nullTs, doc);
- assertFilteredDocumentAtTimestamp(coll, query2, nullTs, doc2);
-
- // The prepare oplog entry should exist at prepareEntryTs and onwards.
- assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, commitEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, nullTs, true);
-
- // The commit oplog entry should exist at commitEntryTs and onwards.
- assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, prepareEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(commitFilter, nullTs, true);
-
- // The first oplog entry should exist at firstOplogEntryTs and onwards.
- const auto firstOplogEntryFilter =
- BSON("ts" << firstOplogEntryTs << "o"
- << BSON("applyOps"
- << BSON_ARRAY(BSON("op"
- << "i"
- << "ns" << nss.ns() << "ui" << coll->uuid()
- << "o" << doc << "o2" << docKey))
- << "partialTxn" << true));
- assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, firstOplogEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, prepareEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, commitEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(firstOplogEntryFilter, nullTs, true);
- // The prepare oplog entry should exist at prepareEntryTs and onwards.
- const auto prepareOplogEntryFilter =
- BSON("ts" << prepareEntryTs << "o"
- << BSON("applyOps"
- << BSON_ARRAY(BSON("op"
- << "i"
- << "ns" << nss.ns() << "ui" << coll->uuid()
- << "o" << doc2 << "o2" << doc2Key))
- << "prepare" << true << "count" << 2));
- assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, firstOplogEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, prepareEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, commitEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, nullTs, true);
-
- assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
- assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
- assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, firstOplogEntryTs);
- assertOldestActiveTxnTimestampEquals(firstOplogEntryTs, prepareEntryTs);
- // The transaction is no longer considered active after being committed.
- assertOldestActiveTxnTimestampEquals(boost::none, commitEntryTs);
- assertOldestActiveTxnTimestampEquals(boost::none, nullTs);
-
- // The session state should go to inProgress at firstOplogEntryTs, then to prepared at
- // prepareEntryTs, and then finally to committed at commitEntryTs.
- auto sessionInfo = getSessionTxnInfoAtTimestamp(firstOplogEntryTs, true);
- ASSERT_EQ(sessionInfo["state"].String(), "inProgress");
- ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), firstOplogEntryTs);
- ASSERT_EQ(sessionInfo["startOpTime"]["ts"].timestamp(), firstOplogEntryTs);
-
- sessionInfo = getSessionTxnInfoAtTimestamp(prepareEntryTs, true);
- ASSERT_EQ(sessionInfo["state"].String(), "prepared");
- ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), prepareEntryTs);
- ASSERT_EQ(sessionInfo["startOpTime"]["ts"].timestamp(), firstOplogEntryTs);
-
- sessionInfo = getSessionTxnInfoAtTimestamp(nullTs, true);
- ASSERT_EQ(sessionInfo["state"].String(), "committed");
- ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), commitEntryTs);
- ASSERT_FALSE(sessionInfo.hasField("startOpTime"));
- }
- }
-
-protected:
- Timestamp firstOplogEntryTs, secondOplogEntryTs, prepareEntryTs, commitTs;
-
-private:
- MultiOplogScopedSettings multiOplogSettings;
-};
-
-class AbortPreparedMultiOplogEntryTransaction : public MultiDocumentTransactionTest {
-public:
- AbortPreparedMultiOplogEntryTransaction()
- : MultiDocumentTransactionTest("preparedMultiOplogEntryTransaction") {
- const auto currentTime = _clock->getTime();
- const auto clusterTime = currentTime.clusterTime();
- prepareEntryTs = clusterTime.addTicks(1).asTimestamp();
- abortEntryTs = clusterTime.addTicks(2).asTimestamp();
- }
-
- void run() {
- auto txnParticipant = TransactionParticipant::get(_opCtx);
- ASSERT(txnParticipant);
- LOGV2(22512, "PrepareTS: {prepareEntryTs}", "prepareEntryTs"_attr = prepareEntryTs);
- LOGV2(22513, "AbortTS: {abortEntryTs}", "abortEntryTs"_attr = abortEntryTs);
-
- const auto prepareFilter = BSON("ts" << prepareEntryTs);
- const auto abortFilter = BSON("ts" << abortEntryTs);
- {
- assertOplogDocumentExistsAtTimestamp(abortFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, prepareEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, abortEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, nullTs, false);
- }
- txnParticipant.unstashTransactionResources(_opCtx, "insert");
-
- txnParticipant.prepareTransaction(_opCtx, {});
-
- txnParticipant.stashTransactionResources(_opCtx);
- {
- assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, abortEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, nullTs, true);
-
- assertOplogDocumentExistsAtTimestamp(abortFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, firstOplogEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, prepareEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, abortEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, nullTs, false);
- }
-
- txnParticipant.unstashTransactionResources(_opCtx, "abortTransaction");
-
- txnParticipant.abortTransaction(_opCtx);
-
- txnParticipant.stashTransactionResources(_opCtx);
- {
- // The prepare oplog entry should exist at prepareEntryTs and onwards.
- assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, abortEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, nullTs, true);
-
- // The abort oplog entry should exist at abortEntryTs and onwards.
- assertOplogDocumentExistsAtTimestamp(abortFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, prepareEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, abortEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(abortFilter, nullTs, true);
-
- UUID ui = UUID::gen();
- {
- AutoGetCollection coll(_opCtx, nss, LockMode::MODE_IX);
- ASSERT(coll);
- ui = coll->uuid();
- }
-
- // The prepare oplog entry should exist at firstOplogEntryTs and onwards.
- const auto prepareOplogEntryFilter =
- BSON("ts" << prepareEntryTs << "o"
- << BSON("applyOps" << BSON_ARRAY(BSON("op"
- << "i"
- << "ns" << nss.ns() << "ui" << ui
- << "o" << doc << "o2" << docKey))
- << "prepare" << true));
- assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, prepareEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, abortEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareOplogEntryFilter, nullTs, true);
-
- assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
- assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
- assertOldestActiveTxnTimestampEquals(boost::none, abortEntryTs);
- assertOldestActiveTxnTimestampEquals(boost::none, nullTs);
-
- // The session state should be "aborted" at abortEntryTs.
- auto sessionInfo = getSessionTxnInfoAtTimestamp(abortEntryTs, true);
- ASSERT_EQ(sessionInfo["state"].String(), "aborted");
- ASSERT_EQ(sessionInfo["lastWriteOpTime"]["ts"].timestamp(), abortEntryTs);
- ASSERT_FALSE(sessionInfo.hasField("startOpTime"));
- }
- }
-
-protected:
- Timestamp firstOplogEntryTs, secondOplogEntryTs, prepareEntryTs, abortEntryTs;
-
-private:
- MultiOplogScopedSettings multiOplogSettings;
-};
-
-class PreparedMultiDocumentTransaction : public MultiDocumentTransactionTest {
-public:
- PreparedMultiDocumentTransaction()
- : MultiDocumentTransactionTest("preparedMultiDocumentTransaction") {}
-
- void run() {
- auto txnParticipant = TransactionParticipant::get(_opCtx);
- ASSERT(txnParticipant);
-
- const auto currentTime = _clock->getTime();
- const auto clusterTime = currentTime.clusterTime();
- const auto prepareTs = clusterTime.addTicks(1).asTimestamp();
- const auto commitTs = clusterTime.addTicks(2).asTimestamp();
- commitEntryTs = clusterTime.addTicks(3).asTimestamp();
- LOGV2(22514, "Prepare TS: {prepareTs}", "prepareTs"_attr = prepareTs);
- logTimestamps();
-
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
- const auto& coll = autoColl.getCollection();
- assertDocumentAtTimestamp(coll, prepareTs, BSONObj());
- assertDocumentAtTimestamp(coll, commitEntryTs, BSONObj());
-
- const auto prepareFilter = BSON("ts" << prepareTs);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, commitEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, nullTs, false);
-
- const auto commitFilter = BSON("ts" << commitEntryTs);
- assertOplogDocumentExistsAtTimestamp(commitFilter, prepareTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, false);
- }
- txnParticipant.unstashTransactionResources(_opCtx, "insert");
-
- txnParticipant.prepareTransaction(_opCtx, {});
-
- txnParticipant.stashTransactionResources(_opCtx);
- assertHasStartOpTime();
- {
- const auto prepareFilter = BSON("ts" << prepareTs);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, commitEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, nullTs, true);
-
- const auto commitFilter = BSON("ts" << commitEntryTs);
- assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, prepareTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, nullTs, false);
-
- assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
- assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
- assertOldestActiveTxnTimestampEquals(prepareTs, prepareTs);
- assertOldestActiveTxnTimestampEquals(prepareTs, nullTs);
- assertOldestActiveTxnTimestampEquals(prepareTs, commitEntryTs);
- }
- txnParticipant.unstashTransactionResources(_opCtx, "commitTransaction");
-
- {
- FailPointEnableBlock failPointBlock("skipCommitTxnCheckPrepareMajorityCommitted");
- txnParticipant.commitPreparedTransaction(_opCtx, commitTs, {});
- }
-
- assertNoStartOpTime();
-
- txnParticipant.stashTransactionResources(_opCtx);
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- const auto& coll = autoColl.getCollection();
- assertDocumentAtTimestamp(coll, presentTs, BSONObj());
- assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
- assertDocumentAtTimestamp(coll, prepareTs, BSONObj());
- assertDocumentAtTimestamp(coll, commitEntryTs, doc);
- assertDocumentAtTimestamp(coll, nullTs, doc);
-
- const auto prepareFilter = BSON("ts" << prepareTs);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, commitEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, nullTs, true);
-
- const auto commitFilter = BSON("ts" << commitEntryTs);
- assertOplogDocumentExistsAtTimestamp(commitFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, prepareTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, commitEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(commitFilter, nullTs, true);
-
- assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
- assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
- assertOldestActiveTxnTimestampEquals(prepareTs, prepareTs);
- assertOldestActiveTxnTimestampEquals(boost::none, commitEntryTs);
- assertOldestActiveTxnTimestampEquals(boost::none, nullTs);
- }
- }
-};
-
-class AbortedPreparedMultiDocumentTransaction : public MultiDocumentTransactionTest {
-public:
- AbortedPreparedMultiDocumentTransaction()
- : MultiDocumentTransactionTest("abortedPreparedMultiDocumentTransaction") {}
-
- void run() {
- auto txnParticipant = TransactionParticipant::get(_opCtx);
- ASSERT(txnParticipant);
-
- const auto currentTime = _clock->getTime();
- const auto clusterTime = currentTime.clusterTime();
- const auto prepareTs = clusterTime.addTicks(1).asTimestamp();
- const auto abortEntryTs = clusterTime.addTicks(2).asTimestamp();
- LOGV2(22515, "Prepare TS: {prepareTs}", "prepareTs"_attr = prepareTs);
- logTimestamps();
-
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS);
- const auto& coll = autoColl.getCollection();
- assertDocumentAtTimestamp(coll, prepareTs, BSONObj());
- assertDocumentAtTimestamp(coll, abortEntryTs, BSONObj());
-
- const auto prepareFilter = BSON("ts" << prepareTs);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, abortEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, nullTs, false);
-
- const auto commitFilter = BSON("ts" << abortEntryTs);
- assertOplogDocumentExistsAtTimestamp(commitFilter, prepareTs, false);
- assertOplogDocumentExistsAtTimestamp(commitFilter, abortEntryTs, false);
- }
- txnParticipant.unstashTransactionResources(_opCtx, "insert");
-
- txnParticipant.prepareTransaction(_opCtx, {});
-
- txnParticipant.stashTransactionResources(_opCtx);
- assertHasStartOpTime();
- {
- const auto prepareFilter = BSON("ts" << prepareTs);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, abortEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, nullTs, true);
-
- const auto abortFilter = BSON("ts" << abortEntryTs);
- assertOplogDocumentExistsAtTimestamp(abortFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, prepareTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, abortEntryTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, nullTs, false);
-
- assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
- assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
- assertOldestActiveTxnTimestampEquals(prepareTs, prepareTs);
- assertOldestActiveTxnTimestampEquals(prepareTs, nullTs);
- assertOldestActiveTxnTimestampEquals(prepareTs, abortEntryTs);
- }
- txnParticipant.unstashTransactionResources(_opCtx, "abortTransaction");
-
- txnParticipant.abortTransaction(_opCtx);
- assertNoStartOpTime();
-
- txnParticipant.stashTransactionResources(_opCtx);
- {
- AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IX);
- const auto& coll = autoColl.getCollection();
- assertDocumentAtTimestamp(coll, presentTs, BSONObj());
- assertDocumentAtTimestamp(coll, beforeTxnTs, BSONObj());
- assertDocumentAtTimestamp(coll, prepareTs, BSONObj());
- assertDocumentAtTimestamp(coll, abortEntryTs, BSONObj());
- assertDocumentAtTimestamp(coll, nullTs, BSONObj());
-
- const auto prepareFilter = BSON("ts" << prepareTs);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, prepareTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, abortEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(prepareFilter, nullTs, true);
-
- const auto abortFilter = BSON("ts" << abortEntryTs);
- assertOplogDocumentExistsAtTimestamp(abortFilter, presentTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, beforeTxnTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, prepareTs, false);
- assertOplogDocumentExistsAtTimestamp(abortFilter, abortEntryTs, true);
- assertOplogDocumentExistsAtTimestamp(abortFilter, nullTs, true);
-
- assertOldestActiveTxnTimestampEquals(boost::none, presentTs);
- assertOldestActiveTxnTimestampEquals(boost::none, beforeTxnTs);
- assertOldestActiveTxnTimestampEquals(prepareTs, prepareTs);
- assertOldestActiveTxnTimestampEquals(boost::none, abortEntryTs);
- assertOldestActiveTxnTimestampEquals(boost::none, nullTs);
- }
- }
-};
-
-class AllStorageTimestampTests : public unittest::OldStyleSuiteSpecification {
-public:
- AllStorageTimestampTests() : unittest::OldStyleSuiteSpecification("StorageTimestampTests") {}
-
- // Must be evaluated at test run() time, not static-init time.
- static bool shouldSkip() {
- // Only run on storage engines that support snapshot reads.
- auto storageEngine = cc().getServiceContext()->getStorageEngine();
- if (!storageEngine->supportsReadConcernSnapshot() ||
- !mongo::serverGlobalParams.enableMajorityReadConcern) {
- LOGV2(22516,
- "Skipping this test suite because storage engine {storageGlobalParams_engine} "
- "does not support timestamp writes.",
- "storageGlobalParams_engine"_attr = storageGlobalParams.engine);
- return true;
- }
- return false;
- }
-
- template <typename T>
- void addIf() {
- addNameCallback(nameForTestClass<T>(), [] {
- if (!shouldSkip())
- T().run();
- });
- }
-
- void setupTests() {
- addIf<SecondaryInsertTimes>();
- addIf<SecondaryArrayInsertTimes>();
- addIf<SecondaryDeleteTimes>();
- addIf<SecondaryUpdateTimes>();
- addIf<SecondaryInsertToUpsert>();
- addIf<SecondaryAtomicApplyOps>();
- addIf<SecondaryAtomicApplyOpsWCEToNonAtomic>();
- addIf<SecondaryCreateCollection>();
- addIf<SecondaryCreateTwoCollections>();
- addIf<SecondaryCreateCollectionBetweenInserts>();
- addIf<PrimaryCreateCollectionInApplyOps>();
- addIf<SecondarySetIndexMultikeyOnInsert>();
- addIf<SecondarySetWildcardIndexMultikeyOnInsert>();
- addIf<SecondarySetWildcardIndexMultikeyOnUpdate>();
- addIf<InitialSyncSetIndexMultikeyOnInsert>();
- addIf<PrimarySetIndexMultikeyOnInsert>();
- addIf<PrimarySetIndexMultikeyOnInsertUnreplicated>();
- addIf<PrimarySetsMultikeyInsideMultiDocumentTransaction>();
- addIf<InitializeMinValid>();
- addIf<SetMinValidInitialSyncFlag>();
- addIf<SetMinValidAppliedThrough>();
- // KVDropDatabase<SimulatePrimary>
- addIf<KVDropDatabase<false>>();
- addIf<KVDropDatabase<true>>();
- // TimestampIndexBuilds<SimulatePrimary>
- addIf<TimestampIndexBuilds<false>>();
- addIf<TimestampIndexBuilds<true>>();
- addIf<TimestampMultiIndexBuilds>();
- addIf<TimestampMultiIndexBuildsDuringRename>();
- addIf<TimestampAbortIndexBuild>();
- addIf<TimestampIndexDropsWildcard>();
- addIf<TimestampIndexDropsListed>();
- addIf<TimestampIndexOplogApplicationOnPrimary>();
- addIf<SecondaryReadsDuringBatchApplicationAreAllowed>();
- addIf<ViewCreationSeparateTransaction>();
- addIf<CreateCollectionWithSystemIndex>();
- addIf<MultiDocumentTransaction>();
- addIf<MultiOplogEntryTransaction>();
- addIf<CommitPreparedMultiOplogEntryTransaction>();
- addIf<AbortPreparedMultiOplogEntryTransaction>();
- addIf<PreparedMultiDocumentTransaction>();
- addIf<AbortedPreparedMultiDocumentTransaction>();
- addIf<IndexBuildsResolveErrorsDuringStateChangeToPrimary>();
- addIf<RetryableFindAndModifyUpdate>();
- addIf<RetryableFindAndModifyUpdateWithDamages>();
- addIf<RetryableFindAndModifyDelete>();
- }
-};
-
-unittest::OldStyleSuiteInitializer<AllStorageTimestampTests> allStorageTimestampTests;
-} // namespace mongo