diff options
author | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2018-05-14 11:09:50 -0400 |
---|---|---|
committer | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2018-05-14 11:09:50 -0400 |
commit | 8e27c2b49ba17fe9b02695efb29a6322b56c2f23 (patch) | |
tree | d8b0682eee38a4655bf8d61b00143df6a1153a4d /src/mongo/dbtests | |
parent | 074c9725f1e6ad9b005ab08acc1aedf34e537d69 (diff) | |
download | mongo-8e27c2b49ba17fe9b02695efb29a6322b56c2f23.tar.gz |
SERVER-34896: Move timestamping responsibility on index completion to callers.
Previously, `MultiIndexBlockImpl` would use member state as a proxy for whether
an index build originated from a `createIndexes` command. This information was
used to determine if the completion of the index needed to be explicitly
timestamped, or whether an oplog entry would timestamp the write.
However, nodes in a primary state can build indexes during primary "catch-up"
or "drain". Those index builds originated from an oplog entry and thus will not
generate one on completion. These index builds need to be explicitly
timestamped.
Diffstat (limited to 'src/mongo/dbtests')
-rw-r--r-- | src/mongo/dbtests/storage_timestamp_tests.cpp | 285 |
1 files changed, 206 insertions, 79 deletions
diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp index 1c05ac712e9..bc26bab34a2 100644 --- a/src/mongo/dbtests/storage_timestamp_tests.cpp +++ b/src/mongo/dbtests/storage_timestamp_tests.cpp @@ -73,8 +73,36 @@ namespace mongo { +namespace { +/** + * RAII type for operating at a timestamp. Will remove any timestamping when the object destructs. + */ +class OneOffRead { +public: + OneOffRead(OperationContext* opCtx, const Timestamp& ts) : _opCtx(opCtx) { + _opCtx->recoveryUnit()->abandonSnapshot(); + ASSERT_OK(_opCtx->recoveryUnit()->setPointInTimeReadTimestamp(ts)); + } + + ~OneOffRead() { + _opCtx->recoveryUnit()->abandonSnapshot(); + ASSERT_OK(_opCtx->recoveryUnit()->setPointInTimeReadTimestamp(Timestamp::min())); + } + +private: + OperationContext* _opCtx; +}; +} + const auto kIndexVersion = IndexDescriptor::IndexVersion::kV2; +BSONCollectionCatalogEntry::IndexMetaData getIndexMetaData( + const BSONCollectionCatalogEntry::MetaData& collMetaData, StringData indexName) { + const auto idxOffset = collMetaData.findIndexOffset(indexName); + invariant(idxOffset > -1); + return collMetaData.indexes[idxOffset]; +} + class StorageTimestampTest { public: ServiceContext::UniqueOperationContext _opCtxRaii = cc().makeOperationContext(); @@ -206,8 +234,17 @@ public: return optRecord.get().data.toBson(); } + BSONCollectionCatalogEntry::MetaData getMetaDataAtTime(KVCatalog* kvCatalog, + NamespaceString ns, + const Timestamp& ts) { + OneOffRead oor(_opCtx, ts); + return kvCatalog->getMetaData(_opCtx, ns.ns()); + } + StatusWith<BSONObj> doAtomicApplyOps(const std::string& dbName, const std::list<BSONObj>& applyOpsList) { + OneOffRead oor(_opCtx, Timestamp::min()); + BSONObjBuilder result; Status status = applyOps(_opCtx, dbName, @@ -224,6 +261,8 @@ public: // Creates a dummy command operation to persuade `applyOps` to be non-atomic. StatusWith<BSONObj> doNonAtomicApplyOps(const std::string& dbName, const std::list<BSONObj>& applyOpsList) { + OneOffRead oor(_opCtx, Timestamp::min()); + BSONObjBuilder result; Status status = applyOps(_opCtx, dbName, @@ -240,9 +279,8 @@ public: void assertMinValidDocumentAtTimestamp(Collection* coll, const Timestamp& ts, const repl::MinValidDocument& expectedDoc) { - auto recoveryUnit = _opCtx->recoveryUnit(); - recoveryUnit->abandonSnapshot(); - ASSERT_OK(recoveryUnit->setPointInTimeReadTimestamp(ts)); + OneOffRead oor(_opCtx, ts); + auto doc = repl::MinValidDocument::parse(IDLParserErrorContext("MinValidDocument"), findOne(coll)); ASSERT_EQ(expectedDoc.getMinValidTimestamp(), doc.getMinValidTimestamp()) @@ -265,10 +303,7 @@ public: void assertDocumentAtTimestamp(Collection* coll, const Timestamp& ts, const BSONObj& expectedDoc) { - - auto recoveryUnit = _opCtx->recoveryUnit(); - recoveryUnit->abandonSnapshot(); - ASSERT_OK(recoveryUnit->setPointInTimeReadTimestamp(ts)); + OneOffRead oor(_opCtx, ts); if (expectedDoc.isEmpty()) { ASSERT_EQ(0, itCount(coll)) << "Should not find any documents in " << coll->ns() << " at ts: " << ts; @@ -290,24 +325,18 @@ public: * Asserts that the given collection is in (or not in) the KVCatalog's list of idents at the * provided timestamp. */ - void assertNamespaceInIdents(OperationContext* opCtx, - NamespaceString nss, - Timestamp ts, - bool shouldExpect) { + void assertNamespaceInIdents(NamespaceString nss, Timestamp ts, bool shouldExpect) { + OneOffRead oor(_opCtx, ts); KVCatalog* kvCatalog = static_cast<KVStorageEngine*>(_opCtx->getServiceContext()->getStorageEngine()) ->getCatalog(); - auto recoveryUnit = _opCtx->recoveryUnit(); - recoveryUnit->abandonSnapshot(); - ASSERT_OK(recoveryUnit->setPointInTimeReadTimestamp(ts)); - // getCollectionIdent() returns the ident for the given namespace in the KVCatalog. // getAllIdents() actually looks in the RecordStore for a list of all idents, and is thus // versioned by timestamp. These tests do not do any renames, so we can expect the // namespace to have a consistent ident across timestamps, if it exists. auto expectedIdent = kvCatalog->getCollectionIdent(nss.ns()); - auto idents = kvCatalog->getAllIdents(opCtx); + auto idents = kvCatalog->getAllIdents(_opCtx); auto found = std::find(idents.begin(), idents.end(), expectedIdent); if (shouldExpect) { @@ -319,6 +348,8 @@ public: } std::string getNewIndexIdent(KVCatalog* kvCatalog, std::vector<std::string>& origIdents) { + OneOffRead oor(_opCtx, Timestamp::min()); + // Find the collection and index ident by performing a set difference on the original // idents and the current idents. std::vector<std::string> identsWithColl = kvCatalog->getAllIdents(_opCtx); @@ -331,7 +362,7 @@ public: origIdents.end(), std::back_inserter(collAndIdxIdents)); - ASSERT(collAndIdxIdents.size() == 1); + ASSERT(collAndIdxIdents.size() == 1) << "Num idents: " << collAndIdxIdents.size(); return collAndIdxIdents[0]; } @@ -366,9 +397,8 @@ public: const std::string& collIdent, const std::string& indexIdent, Timestamp timestamp) { - auto recoveryUnit = _opCtx->recoveryUnit(); - recoveryUnit->abandonSnapshot(); - ASSERT_OK(_opCtx->recoveryUnit()->setPointInTimeReadTimestamp(timestamp)); + OneOffRead oor(_opCtx, timestamp); + auto allIdents = kvCatalog->getAllIdents(_opCtx); if (collIdent.size() > 0) { // Index build test does not pass in a collection ident. @@ -385,9 +415,7 @@ public: const std::string& collIdent, const std::string& indexIdent, Timestamp timestamp) { - auto recoveryUnit = _opCtx->recoveryUnit(); - recoveryUnit->abandonSnapshot(); - ASSERT_OK(_opCtx->recoveryUnit()->setPointInTimeReadTimestamp(timestamp)); + OneOffRead oor(_opCtx, timestamp); auto allIdents = kvCatalog->getAllIdents(_opCtx); if (collIdent.size() > 0) { // Index build test does not pass in a collection ident. @@ -422,9 +450,7 @@ public: const MultikeyPaths& expectedMultikeyPaths) { auto catalog = collection->getCatalogEntry(); - auto recoveryUnit = _opCtx->recoveryUnit(); - recoveryUnit->abandonSnapshot(); - ASSERT_OK(recoveryUnit->setPointInTimeReadTimestamp(ts)); + OneOffRead oor(_opCtx, ts); MultikeyPaths actualMultikeyPaths; if (!shouldBeMultikey) { @@ -495,10 +521,8 @@ public: } for (std::uint32_t idx = 0; idx < docsToInsert; ++idx) { - auto recoveryUnit = _opCtx->recoveryUnit(); - recoveryUnit->abandonSnapshot(); - ASSERT_OK(recoveryUnit->setPointInTimeReadTimestamp( - firstInsertTime.addTicks(idx).asTimestamp())); + OneOffRead oor(_opCtx, firstInsertTime.addTicks(idx).asTimestamp()); + BSONObj result; ASSERT(Helpers::getLast(_opCtx, nss.ns().c_str(), result)) << " idx is " << idx; ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(result, BSON("_id" << idx))) @@ -578,10 +602,8 @@ public: for (std::uint32_t idx = 0; idx < docsToInsert; ++idx) { - auto recoveryUnit = _opCtx->recoveryUnit(); - recoveryUnit->abandonSnapshot(); - ASSERT_OK(recoveryUnit->setPointInTimeReadTimestamp( - firstInsertTime.addTicks(idx).asTimestamp())); + OneOffRead oor(_opCtx, firstInsertTime.addTicks(idx).asTimestamp()); + BSONObj result; ASSERT(Helpers::getLast(_opCtx, nss.ns().c_str(), result)) << " idx is " << idx; ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(result, BSON("_id" << idx))) @@ -645,10 +667,7 @@ public: for (std::int32_t num = 0; num <= docsToInsert; ++num) { // The first loop queries at `lastInsertTime` and should count all documents. Querying // at each successive tick counts one less document. - auto recoveryUnit = _opCtx->recoveryUnit(); - recoveryUnit->abandonSnapshot(); - ASSERT_OK(recoveryUnit->setPointInTimeReadTimestamp( - lastInsertTime.addTicks(num).asTimestamp())); + OneOffRead oor(_opCtx, lastInsertTime.addTicks(num).asTimestamp()); ASSERT_EQ(docsToInsert - num, itCount(autoColl.getCollection())); } } @@ -722,10 +741,7 @@ public: for (std::size_t idx = 0; idx < updates.size(); ++idx) { // Querying at each successive ticks after `insertTime` sees the document transform in // the series. - auto recoveryUnit = _opCtx->recoveryUnit(); - recoveryUnit->abandonSnapshot(); - ASSERT_OK(recoveryUnit->setPointInTimeReadTimestamp( - insertTime.addTicks(idx + 1).asTimestamp())); + OneOffRead oor(_opCtx, insertTime.addTicks(idx + 1).asTimestamp()); auto doc = findOne(autoColl.getCollection()); ASSERT_EQ(0, SimpleBSONObjComparator::kInstance.compare(doc, updates[idx].second)) @@ -951,10 +967,10 @@ public: { ASSERT(AutoGetCollectionForReadCommand(_opCtx, nss).getCollection()); } - assertNamespaceInIdents(_opCtx, nss, pastTs, false); - assertNamespaceInIdents(_opCtx, nss, presentTs, true); - assertNamespaceInIdents(_opCtx, nss, futureTs, true); - assertNamespaceInIdents(_opCtx, nss, nullTs, true); + assertNamespaceInIdents(nss, pastTs, false); + assertNamespaceInIdents(nss, presentTs, true); + assertNamespaceInIdents(nss, futureTs, true); + assertNamespaceInIdents(nss, nullTs, true); } }; @@ -1006,17 +1022,17 @@ public: { ASSERT(AutoGetCollectionForReadCommand(_opCtx, nss1).getCollection()); } { ASSERT(AutoGetCollectionForReadCommand(_opCtx, nss2).getCollection()); } - assertNamespaceInIdents(_opCtx, nss1, pastTs, false); - assertNamespaceInIdents(_opCtx, nss1, presentTs, true); - assertNamespaceInIdents(_opCtx, nss1, futureTs, true); - assertNamespaceInIdents(_opCtx, nss1, dummyTs, true); - assertNamespaceInIdents(_opCtx, nss1, nullTs, true); - - assertNamespaceInIdents(_opCtx, nss2, pastTs, false); - assertNamespaceInIdents(_opCtx, nss2, presentTs, false); - assertNamespaceInIdents(_opCtx, nss2, futureTs, true); - assertNamespaceInIdents(_opCtx, nss2, dummyTs, true); - assertNamespaceInIdents(_opCtx, nss2, nullTs, true); + assertNamespaceInIdents(nss1, pastTs, false); + assertNamespaceInIdents(nss1, presentTs, true); + assertNamespaceInIdents(nss1, futureTs, true); + assertNamespaceInIdents(nss1, dummyTs, true); + assertNamespaceInIdents(nss1, nullTs, true); + + assertNamespaceInIdents(nss2, pastTs, false); + assertNamespaceInIdents(nss2, presentTs, false); + assertNamespaceInIdents(nss2, futureTs, true); + assertNamespaceInIdents(nss2, dummyTs, true); + assertNamespaceInIdents(nss2, nullTs, true); } }; @@ -1099,12 +1115,12 @@ public: assertDocumentAtTimestamp(coll1, dummyTs, doc1); assertDocumentAtTimestamp(coll1, nullTs, doc1); - assertNamespaceInIdents(_opCtx, nss2, pastTs, false); - assertNamespaceInIdents(_opCtx, nss2, presentTs, false); - assertNamespaceInIdents(_opCtx, nss2, futureTs, true); - assertNamespaceInIdents(_opCtx, nss2, insert2Ts, true); - assertNamespaceInIdents(_opCtx, nss2, dummyTs, true); - assertNamespaceInIdents(_opCtx, nss2, nullTs, true); + assertNamespaceInIdents(nss2, pastTs, false); + assertNamespaceInIdents(nss2, presentTs, false); + assertNamespaceInIdents(nss2, futureTs, true); + assertNamespaceInIdents(nss2, insert2Ts, true); + assertNamespaceInIdents(nss2, dummyTs, true); + assertNamespaceInIdents(nss2, nullTs, true); assertDocumentAtTimestamp(coll2, pastTs, BSONObj()); assertDocumentAtTimestamp(coll2, presentTs, BSONObj()); @@ -1156,10 +1172,10 @@ public: ASSERT_EQ(op.getNamespace().ns(), nss.getCommandNS().ns()) << op.toBSON(); ASSERT_BSONOBJ_EQ(op.getObject(), BSON("create" << nss.coll())); - assertNamespaceInIdents(_opCtx, nss, pastTs, false); - assertNamespaceInIdents(_opCtx, nss, presentTs, false); - assertNamespaceInIdents(_opCtx, nss, futureTs, true); - assertNamespaceInIdents(_opCtx, nss, nullTs, true); + assertNamespaceInIdents(nss, pastTs, false); + assertNamespaceInIdents(nss, presentTs, false); + assertNamespaceInIdents(nss, futureTs, true); + assertNamespaceInIdents(nss, nullTs, true); } }; @@ -1771,14 +1787,18 @@ public: { WriteUnitOfWork wuow(_opCtx); - // Primaries will perform no timestamping in the indexer's commit. Secondaries will - // look at the logical clock. + // All callers of `MultiIndexBlock::commit` are responsible for timestamping index + // completion. Primaries write an oplog entry. Secondaries explicitly set a + // timestamp. indexer.commit(); if (SimulatePrimary) { // The op observer is not called from the index builder, but rather the // `createIndexes` command. _opCtx->getServiceContext()->getOpObserver()->onCreateIndex( _opCtx, nss, autoColl.getCollection()->uuid(), indexInfoObj, false); + } else { + ASSERT_OK( + _opCtx->recoveryUnit()->setTimestamp(_clock->getClusterTime().asTimestamp())); } wuow.commit(); } @@ -1791,23 +1811,19 @@ public: // Assert that the index entry exists after init and `ready: false`. assertIdentsExistAtTimestamp(kvCatalog, "", indexIdent, afterIndexInit.asTimestamp()); { - _opCtx->recoveryUnit()->abandonSnapshot(); - ASSERT_OK( - _opCtx->recoveryUnit()->setPointInTimeReadTimestamp(afterIndexInit.asTimestamp())); - auto collMetaData = kvCatalog->getMetaData(_opCtx, nss.ns()); - auto indexMetaData = collMetaData.indexes[collMetaData.findIndexOffset("a_1")]; - ASSERT_FALSE(indexMetaData.ready); + ASSERT_FALSE(getIndexMetaData( + getMetaDataAtTime(kvCatalog, nss, afterIndexInit.asTimestamp()), "a_1") + .ready); } // After the build completes, assert that the index is `ready: true` and multikey. assertIdentsExistAtTimestamp(kvCatalog, "", indexIdent, afterIndexBuild); { - _opCtx->recoveryUnit()->abandonSnapshot(); - ASSERT_OK(_opCtx->recoveryUnit()->setPointInTimeReadTimestamp(afterIndexBuild)); - auto collMetaData = kvCatalog->getMetaData(_opCtx, nss.ns()); - auto indexMetaData = collMetaData.indexes[collMetaData.findIndexOffset("a_1")]; + auto indexMetaData = + getIndexMetaData(getMetaDataAtTime(kvCatalog, nss, afterIndexBuild), "a_1"); ASSERT(indexMetaData.ready); ASSERT(indexMetaData.multikey); + ASSERT_EQ(std::size_t(1), indexMetaData.multikeyPaths.size()); const bool match = indexMetaData.multikeyPaths[0] == std::set<std::size_t>({0}); if (!match) { @@ -1917,6 +1933,114 @@ public: } }; +/** + * There are a few scenarios where a primary will be using the IndexBuilder thread to build + * indexes. Specifically, when a primary builds an index from an oplog entry which can happen on + * primary catch-up, drain, a secondary step-up or `applyOps`. + * + * This test will exercise IndexBuilder code on primaries by performing a background index build + * via an `applyOps` command. + */ +template <bool Foreground> +class TimestampIndexBuilderOnPrimary : public StorageTimestampTest { +public: + void run() { + // Only run on 'wiredTiger'. No other storage engines to-date support timestamp writes. + if (mongo::storageGlobalParams.engine != "wiredTiger") { + return; + } + + // In order for applyOps to assign timestamps, we must be in non-replicated mode. + repl::UnreplicatedWritesBlock uwb(_opCtx); + + std::string dbName = "unittest"; + NamespaceString nss(dbName, "indexBuilderOnPrimary"); + BSONObj doc = BSON("_id" << 1 << "field" << 1); + + const LogicalTime setupStart = _clock->reserveTicks(1); + + UUID collUUID = UUID::gen(); + { + // Create the collection and insert a document. + reset(nss); + AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_X, LockMode::MODE_IX); + collUUID = *(autoColl.getCollection()->uuid()); + WriteUnitOfWork wuow(_opCtx); + insertDocument(autoColl.getCollection(), + InsertStatement(doc, setupStart.asTimestamp(), 0LL)); + wuow.commit(); + } + + + { + // Sanity check everything exists. + AutoGetCollectionForReadCommand autoColl(_opCtx, nss); + auto coll = autoColl.getCollection(); + ASSERT(coll); + + const auto presentTs = _clock->getClusterTime().asTimestamp(); + assertDocumentAtTimestamp(coll, presentTs, doc); + } + + { + // Create a background index via `applyOps`. We will timestamp the beginning at + // `startBuildTs` and the end, due to manipulation of the logical clock, should be + // timestamped at `endBuildTs`. + const auto beforeBuildTime = _clock->reserveTicks(3); + const auto startBuildTs = beforeBuildTime.addTicks(1).asTimestamp(); + const auto endBuildTs = beforeBuildTime.addTicks(3).asTimestamp(); + + // Grab the existing idents to identify the ident created by the index build. + auto kvStorageEngine = + dynamic_cast<KVStorageEngine*>(_opCtx->getServiceContext()->getStorageEngine()); + KVCatalog* kvCatalog = kvStorageEngine->getCatalog(); + std::vector<std::string> origIdents = kvCatalog->getAllIdents(_opCtx); + + auto indexSpec = BSON("createIndexes" << nss.coll() << "ns" << nss.ns() << "v" + << static_cast<int>(kIndexVersion) + << "key" + << BSON("field" << 1) + << "name" + << "field_1" + << "background" + << (Foreground ? false : true)); + + auto createIndexOp = + BSON("ts" << startBuildTs << "t" << 1LL << "h" << 0xBEEFBEEFLL << "v" << 2 << "op" + << "c" + << "ns" + << nss.getCommandNS().ns() + << "ui" + << collUUID + << "o" + << indexSpec); + + ASSERT_OK(doAtomicApplyOps(nss.db().toString(), {createIndexOp})); + + const std::string indexIdent = getNewIndexIdent(kvCatalog, origIdents); + assertIdentsMissingAtTimestamp( + kvCatalog, "", indexIdent, beforeBuildTime.asTimestamp()); + assertIdentsExistAtTimestamp(kvCatalog, "", indexIdent, startBuildTs); + if (Foreground) { + // In the Foreground case, the index build should start and finish at + // `startBuildTs`. + ASSERT_TRUE( + getIndexMetaData(getMetaDataAtTime(kvCatalog, nss, startBuildTs), "field_1") + .ready); + } else { + // In the Background case, the index build should not be "ready" at `startBuildTs`. + ASSERT_FALSE( + getIndexMetaData(getMetaDataAtTime(kvCatalog, nss, startBuildTs), "field_1") + .ready); + assertIdentsExistAtTimestamp(kvCatalog, "", indexIdent, endBuildTs); + ASSERT_TRUE( + getIndexMetaData(getMetaDataAtTime(kvCatalog, nss, endBuildTs), "field_1") + .ready); + } + } + } +}; + class AllStorageTimestampTests : public unittest::Suite { public: @@ -1945,6 +2069,9 @@ public: // TimestampIndexBuilds<SimulatePrimary> add<TimestampIndexBuilds<false>>(); add<TimestampIndexBuilds<true>>(); + // TimestampIndexBuilderOnPrimary<Background> + add<TimestampIndexBuilderOnPrimary<false>>(); + add<TimestampIndexBuilderOnPrimary<true>>(); add<SecondaryReadsDuringBatchApplicationAreAllowed>(); } }; |