diff options
author | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2018-02-21 22:13:20 -0500 |
---|---|---|
committer | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2018-02-21 22:13:23 -0500 |
commit | 80b1a54a112b5853d0903ae424ffc5e3bb289077 (patch) | |
tree | 3b2cad4010d1deb12a7f91686f5437c48822c896 /src/mongo | |
parent | 3157be3048cdeb676579ed0d860d8416cb8c4667 (diff) | |
download | mongo-80b1a54a112b5853d0903ae424ffc5e3bb289077.tar.gz |
SERVER-33359: Allow RTT storage engines to manage indexes on rollback.
Diffstat (limited to 'src/mongo')
17 files changed, 135 insertions, 29 deletions
diff --git a/src/mongo/db/catalog/collection_catalog_entry.h b/src/mongo/db/catalog/collection_catalog_entry.h index ca07d2b17bc..2bd8bf2d125 100644 --- a/src/mongo/db/catalog/collection_catalog_entry.h +++ b/src/mongo/db/catalog/collection_catalog_entry.h @@ -110,7 +110,9 @@ public: virtual Status removeIndex(OperationContext* opCtx, StringData indexName) = 0; - virtual Status prepareForIndexBuild(OperationContext* opCtx, const IndexDescriptor* spec) = 0; + virtual Status prepareForIndexBuild(OperationContext* opCtx, + const IndexDescriptor* spec, + bool isBackgroundSecondaryBuild) = 0; virtual void indexBuildSuccess(OperationContext* opCtx, StringData indexName) = 0; diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp index 47058be4422..9d51004abd8 100644 --- a/src/mongo/db/catalog/index_catalog_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_impl.cpp @@ -60,6 +60,7 @@ #include "mongo/db/query/collation/collation_spec.h" #include "mongo/db/query/collation/collator_factory_interface.h" #include "mongo/db/query/internal_plans.h" +#include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_coordinator_global.h" #include "mongo/db/server_options.h" #include "mongo/db/service_context.h" @@ -405,7 +406,15 @@ Status IndexCatalogImpl::IndexBuildBlock::init() { /// ---------- setup on disk structures ---------------- - Status status = _collection->getCatalogEntry()->prepareForIndexBuild(_opCtx, descriptor.get()); + bool isBackgroundSecondaryBuild = false; + if (auto replCoord = repl::ReplicationCoordinator::get(_opCtx)) { + isBackgroundSecondaryBuild = + replCoord->getReplicationMode() == repl::ReplicationCoordinator::Mode::modeReplSet && + replCoord->getMemberState().secondary() && _spec["background"].trueValue(); + } + + Status status = _collection->getCatalogEntry()->prepareForIndexBuild( + _opCtx, descriptor.get(), isBackgroundSecondaryBuild); if (!status.isOK()) return status; diff --git a/src/mongo/db/mongod_options.cpp b/src/mongo/db/mongod_options.cpp index e63df877d1c..b3e0582e0b9 100644 --- a/src/mongo/db/mongod_options.cpp +++ b/src/mongo/db/mongod_options.cpp @@ -209,19 +209,26 @@ Status addMongodOptions(moe::OptionSection* options) { "collections within a database into a shared record store.") .hidden(); + // Only allow `noIndexBuildRetry` on standalones to quickly access data. Running with + // `noIndexBuildRetry` is risky in a live replica set. For example, trying to drop a + // collection that did not have its indexes rebuilt results in a crash. general_options .addOptionChaining("noIndexBuildRetry", "noIndexBuildRetry", moe::Switch, "don't retry any index builds that were interrupted by shutdown") - .setSources(moe::SourceAllLegacy); + .setSources(moe::SourceAllLegacy) + .incompatibleWith("replication.replSet") + .incompatibleWith("replication.replSetName"); general_options .addOptionChaining("storage.indexBuildRetry", "", moe::Bool, "don't retry any index builds that were interrupted by shutdown") - .setSources(moe::SourceYAMLConfig); + .setSources(moe::SourceYAMLConfig) + .incompatibleWith("replication.replSet") + .incompatibleWith("replication.replSetName"); storage_options .addOptionChaining("noprealloc", diff --git a/src/mongo/db/repair_database_and_check_version.cpp b/src/mongo/db/repair_database_and_check_version.cpp index 88c7e50a851..bf847c31d1a 100644 --- a/src/mongo/db/repair_database_and_check_version.cpp +++ b/src/mongo/db/repair_database_and_check_version.cpp @@ -44,6 +44,7 @@ #include "mongo/db/repair_database.h" #include "mongo/db/repl/drop_pending_collection_reaper.h" #include "mongo/db/repl/replication_coordinator_global.h" +#include "mongo/db/server_options.h" #include "mongo/db/storage/mmap_v1/mmap_v1_options.h" #include "mongo/util/log.h" #include "mongo/util/quick_exit.h" @@ -265,6 +266,17 @@ StatusWith<bool> repairDatabasesAndCheckVersion(OperationContext* opCtx) { StatusWith<std::vector<StorageEngine::CollectionIndexNamePair>> swIndexesToRebuild = storageEngine->reconcileCatalogAndIdents(opCtx); fassertStatusOK(40593, swIndexesToRebuild); + + if (!swIndexesToRebuild.getValue().empty() && serverGlobalParams.indexBuildRetry) { + log() << "note: restart the server with --noIndexBuildRetry " + << "to skip index rebuilds"; + } + + if (!serverGlobalParams.indexBuildRetry) { + log() << " not rebuilding interrupted indexes"; + swIndexesToRebuild.getValue().clear(); + } + for (auto&& collIndexPair : swIndexesToRebuild.getValue()) { const std::string& coll = collIndexPair.first; const std::string& indexName = collIndexPair.second; diff --git a/src/mongo/db/storage/bson_collection_catalog_entry.cpp b/src/mongo/db/storage/bson_collection_catalog_entry.cpp index 57440d12e4d..9703e5f1a89 100644 --- a/src/mongo/db/storage/bson_collection_catalog_entry.cpp +++ b/src/mongo/db/storage/bson_collection_catalog_entry.cpp @@ -278,6 +278,7 @@ BSONObj BSONCollectionCatalogEntry::MetaData::toBSON() const { sub.append("head", static_cast<long long>(indexes[i].head.repr())); sub.append("prefix", indexes[i].prefix.toBSONValue()); + sub.append("backgroundSecondary", indexes[i].isBackgroundSecondaryBuild); sub.doneFast(); } arr.doneFast(); @@ -314,6 +315,9 @@ void BSONCollectionCatalogEntry::MetaData::parse(const BSONObj& obj) { } imd.prefix = KVPrefix::fromBSONElement(idx["prefix"]); + auto bgSecondary = BSONElement(idx["backgroundSecondary"]); + // Opt-in to rebuilding behavior for old-format index catalog objects. + imd.isBackgroundSecondaryBuild = bgSecondary.eoo() || bgSecondary.trueValue(); indexes.push_back(imd); } } diff --git a/src/mongo/db/storage/bson_collection_catalog_entry.h b/src/mongo/db/storage/bson_collection_catalog_entry.h index 6b950cd1a3b..dffcecca32c 100644 --- a/src/mongo/db/storage/bson_collection_catalog_entry.h +++ b/src/mongo/db/storage/bson_collection_catalog_entry.h @@ -75,8 +75,14 @@ public: struct IndexMetaData { IndexMetaData() {} - IndexMetaData(BSONObj s, bool r, RecordId h, bool m, KVPrefix prefix) - : spec(s), ready(r), head(h), multikey(m), prefix(prefix) {} + IndexMetaData( + BSONObj s, bool r, RecordId h, bool m, KVPrefix prefix, bool isBackgroundSecondaryBuild) + : spec(s), + ready(r), + head(h), + multikey(m), + prefix(prefix), + isBackgroundSecondaryBuild(isBackgroundSecondaryBuild) {} void updateTTLSetting(long long newExpireSeconds); @@ -89,6 +95,7 @@ public: RecordId head; bool multikey; KVPrefix prefix = KVPrefix::kNotPrefixed; + bool isBackgroundSecondaryBuild; // If non-empty, 'multikeyPaths' is a vector with size equal to the number of elements in // the index key pattern. Each element in the vector is an ordered set of positions diff --git a/src/mongo/db/storage/kv/kv_catalog.cpp b/src/mongo/db/storage/kv/kv_catalog.cpp index 8d0ebcd6b1a..86ff663d7c1 100644 --- a/src/mongo/db/storage/kv/kv_catalog.cpp +++ b/src/mongo/db/storage/kv/kv_catalog.cpp @@ -470,8 +470,8 @@ BSONObj KVCatalog::_findEntry(OperationContext* opCtx, StringData ns, RecordId* return data.releaseToBson().getOwned(); } -const BSONCollectionCatalogEntry::MetaData KVCatalog::getMetaData(OperationContext* opCtx, - StringData ns) { +BSONCollectionCatalogEntry::MetaData KVCatalog::getMetaData(OperationContext* opCtx, + StringData ns) const { BSONObj obj = _findEntry(opCtx, ns); LOG(3) << " fetched CCE metadata: " << obj; BSONCollectionCatalogEntry::MetaData md; diff --git a/src/mongo/db/storage/kv/kv_catalog.h b/src/mongo/db/storage/kv/kv_catalog.h index aded9fd18a3..61c6541bd46 100644 --- a/src/mongo/db/storage/kv/kv_catalog.h +++ b/src/mongo/db/storage/kv/kv_catalog.h @@ -75,7 +75,7 @@ public: std::string getIndexIdent(OperationContext* opCtx, StringData ns, StringData idName) const; - const BSONCollectionCatalogEntry::MetaData getMetaData(OperationContext* opCtx, StringData ns); + BSONCollectionCatalogEntry::MetaData getMetaData(OperationContext* opCtx, StringData ns) const; void putMetaData(OperationContext* opCtx, StringData ns, BSONCollectionCatalogEntry::MetaData& md); diff --git a/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp b/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp index 05f9f927c79..2e1a787bd78 100644 --- a/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp +++ b/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp @@ -178,11 +178,13 @@ Status KVCollectionCatalogEntry::removeIndex(OperationContext* opCtx, StringData } Status KVCollectionCatalogEntry::prepareForIndexBuild(OperationContext* opCtx, - const IndexDescriptor* spec) { + const IndexDescriptor* spec, + bool isBackgroundSecondaryBuild) { MetaData md = _getMetaData(opCtx); KVPrefix prefix = KVPrefix::getNextPrefix(ns()); - IndexMetaData imd(spec->infoObj(), false, RecordId(), false, prefix); + IndexMetaData imd( + spec->infoObj(), false, RecordId(), false, prefix, isBackgroundSecondaryBuild); if (indexTypeSupportsPathLevelMultikeyTracking(spec->getAccessMethodName())) { const auto feature = KVCatalog::FeatureTracker::RepairableFeature::kPathLevelMultikeyTracking; diff --git a/src/mongo/db/storage/kv/kv_collection_catalog_entry.h b/src/mongo/db/storage/kv/kv_collection_catalog_entry.h index 9cae8936e95..609793f0ebb 100644 --- a/src/mongo/db/storage/kv/kv_collection_catalog_entry.h +++ b/src/mongo/db/storage/kv/kv_collection_catalog_entry.h @@ -64,7 +64,9 @@ public: Status removeIndex(OperationContext* opCtx, StringData indexName) final; - Status prepareForIndexBuild(OperationContext* opCtx, const IndexDescriptor* spec) final; + Status prepareForIndexBuild(OperationContext* opCtx, + const IndexDescriptor* spec, + bool isBackgroundSecondaryBuild) final; void indexBuildSuccess(OperationContext* opCtx, StringData indexName) final; diff --git a/src/mongo/db/storage/kv/kv_collection_catalog_entry_test.cpp b/src/mongo/db/storage/kv/kv_collection_catalog_entry_test.cpp index 252d0dd2491..d2e4a77f7e9 100644 --- a/src/mongo/db/storage/kv/kv_collection_catalog_entry_test.cpp +++ b/src/mongo/db/storage/kv/kv_collection_catalog_entry_test.cpp @@ -97,7 +97,9 @@ public: { WriteUnitOfWork wuow(opCtx.get()); - ASSERT_OK(getCollectionCatalogEntry()->prepareForIndexBuild(opCtx.get(), &desc)); + const bool isSecondaryBackgroundIndexBuild = false; + ASSERT_OK(getCollectionCatalogEntry()->prepareForIndexBuild( + opCtx.get(), &desc, isSecondaryBackgroundIndexBuild)); wuow.commit(); } diff --git a/src/mongo/db/storage/kv/kv_engine_test_harness.cpp b/src/mongo/db/storage/kv/kv_engine_test_harness.cpp index bef9943d5c1..4ca06132c0c 100644 --- a/src/mongo/db/storage/kv/kv_engine_test_harness.cpp +++ b/src/mongo/db/storage/kv/kv_engine_test_harness.cpp @@ -250,7 +250,8 @@ TEST(KVCatalogTest, Idx1) { false, RecordId(), false, - KVPrefix::kNotPrefixed)); + KVPrefix::kNotPrefixed, + false)); catalog->putMetaData(&opCtx, "a.b", md); uow.commit(); } @@ -279,7 +280,8 @@ TEST(KVCatalogTest, Idx1) { false, RecordId(), false, - KVPrefix::kNotPrefixed)); + KVPrefix::kNotPrefixed, + false)); catalog->putMetaData(&opCtx, "a.b", md); uow.commit(); } @@ -326,7 +328,8 @@ TEST(KVCatalogTest, DirectoryPerDb1) { false, RecordId(), false, - KVPrefix::kNotPrefixed)); + KVPrefix::kNotPrefixed, + false)); catalog->putMetaData(&opCtx, "a.b", md); ASSERT_STRING_CONTAINS(catalog->getIndexIdent(&opCtx, "a.b", "foo"), "a/"); ASSERT_TRUE(catalog->isUserDataIdent(catalog->getIndexIdent(&opCtx, "a.b", "foo"))); @@ -370,7 +373,8 @@ TEST(KVCatalogTest, Split1) { false, RecordId(), false, - KVPrefix::kNotPrefixed)); + KVPrefix::kNotPrefixed, + false)); catalog->putMetaData(&opCtx, "a.b", md); ASSERT_STRING_CONTAINS(catalog->getIndexIdent(&opCtx, "a.b", "foo"), "index/"); ASSERT_TRUE(catalog->isUserDataIdent(catalog->getIndexIdent(&opCtx, "a.b", "foo"))); @@ -414,7 +418,8 @@ TEST(KVCatalogTest, DirectoryPerAndSplit1) { false, RecordId(), false, - KVPrefix::kNotPrefixed)); + KVPrefix::kNotPrefixed, + false)); catalog->putMetaData(&opCtx, "a.b", md); ASSERT_STRING_CONTAINS(catalog->getIndexIdent(&opCtx, "a.b", "foo"), "a/index/"); ASSERT_TRUE(catalog->isUserDataIdent(catalog->getIndexIdent(&opCtx, "a.b", "foo"))); @@ -463,7 +468,8 @@ TEST(KVCatalogTest, RestartForPrefixes) { false, RecordId(), false, - fooIndexPrefix)); + fooIndexPrefix, + false)); md.prefix = abCollPrefix; catalog->putMetaData(&opCtx, "a.b", md); uow.commit(); diff --git a/src/mongo/db/storage/kv/kv_storage_engine.cpp b/src/mongo/db/storage/kv/kv_storage_engine.cpp index 436e4a47873..4c8f61ef868 100644 --- a/src/mongo/db/storage/kv/kv_storage_engine.cpp +++ b/src/mongo/db/storage/kv/kv_storage_engine.cpp @@ -219,7 +219,7 @@ KVStorageEngine::reconcileCatalogAndIdents(OperationContext* opCtx) { const auto& identForColl = _catalog->getCollectionIdent(coll); if (engineIdents.find(identForColl) == engineIdents.end()) { return {ErrorCodes::UnrecoverableRollbackError, - str::stream() << "Expected collection does not exist. NS: " << coll + str::stream() << "Expected collection does not exist. Collection: " << coll << " Ident: " << identForColl}; } @@ -227,20 +227,66 @@ KVStorageEngine::reconcileCatalogAndIdents(OperationContext* opCtx) { // Scan all indexes and return those in the catalog where the storage engine does not have the // corresponding ident. The caller is expected to rebuild these indexes. + // + // Also, remove unfinished builds except those that were background index builds started on a + // secondary. std::vector<CollectionIndexNamePair> ret; for (const auto& coll : collections) { - const BSONCollectionCatalogEntry::MetaData metaData = _catalog->getMetaData(opCtx, coll); + BSONCollectionCatalogEntry::MetaData metaData = _catalog->getMetaData(opCtx, coll); + + // Batch up the indexes to remove them from `metaData` outside of the iterator. + std::vector<std::string> indexesToDrop; for (const auto& indexMetaData : metaData.indexes) { const std::string& indexName = indexMetaData.name(); std::string indexIdent = _catalog->getIndexIdent(opCtx, coll, indexName); - if (engineIdents.find(indexIdent) != engineIdents.end()) { + + const bool foundIdent = engineIdents.find(indexIdent) != engineIdents.end(); + // An index drop will immediately remove the ident, but the `indexMetaData` catalog + // entry still exists implying the drop hasn't necessarily been replicated to a + // majority of nodes. The code will rebuild the index, despite potentially + // encountering another `dropIndex` command. + if (indexMetaData.ready && !foundIdent) { + log() << "Expected index data is missing, rebuilding. Collection: " << coll + << " Index: " << indexName; + ret.emplace_back(coll, indexName); + continue; + } + + // If the index was kicked off as a background secondary index build, replication + // recovery will not run into the oplog entry to recreate the index. If the index + // table is not found, or the index build did not successfully complete, this code + // will return the index to be rebuilt. + if (indexMetaData.isBackgroundSecondaryBuild && (!foundIdent || !indexMetaData.ready)) { + log() + << "Expected background index build did not complete, rebuilding. Collection: " + << coll << " Index: " << indexName; + ret.emplace_back(coll, indexName); continue; } - log() << "Expected index data is missing, rebuilding. NS: " << coll - << " Index: " << indexName << " Ident: " << indexIdent; + // The last anomaly is when the index build did not complete, nor was the index build + // a secondary background index build. This implies the index build was on a primary + // and the `createIndexes` command never successfully returned, or the index build was + // a foreground secondary index build, meaning replication recovery will build the + // index when it replays the oplog. In these cases the index entry in the catalog + // should be dropped. + if (!indexMetaData.ready && !indexMetaData.isBackgroundSecondaryBuild) { + log() << "Dropping unfinished index. Collection: " << coll + << " Index: " << indexName; + // Ensure the `ident` is dropped while we have the `indexIdent` value. + fassertStatusOK(50713, _engine->dropIdent(opCtx, indexIdent)); + indexesToDrop.push_back(indexName); + continue; + } + } - ret.push_back(CollectionIndexNamePair(coll, indexName)); + for (auto&& indexName : indexesToDrop) { + dassert(metaData.eraseIndex(indexName)); + } + if (indexesToDrop.size() > 0) { + WriteUnitOfWork wuow(opCtx); + _catalog->putMetaData(opCtx, coll, metaData); + wuow.commit(); } } diff --git a/src/mongo/db/storage/kv/kv_storage_engine_test.cpp b/src/mongo/db/storage/kv/kv_storage_engine_test.cpp index e3c814efcec..7e0b0bfec7a 100644 --- a/src/mongo/db/storage/kv/kv_storage_engine_test.cpp +++ b/src/mongo/db/storage/kv/kv_storage_engine_test.cpp @@ -125,7 +125,8 @@ public: DatabaseCatalogEntry* dbce = _storageEngine->getDatabaseCatalogEntry(opCtx, collNs.db()); CollectionCatalogEntry* cce = dbce->getCollectionCatalogEntry(collNs.ns()); - auto ret = cce->prepareForIndexBuild(opCtx, descriptor.get()); + const bool isBackgroundSecondaryBuild = false; + auto ret = cce->prepareForIndexBuild(opCtx, descriptor.get(), isBackgroundSecondaryBuild); if (!ret.isOK()) { return ret; } diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp index 75c5365e396..25d45ee751b 100644 --- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp +++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.cpp @@ -289,8 +289,8 @@ Status NamespaceDetailsCollectionCatalogEntry::removeIndex(OperationContext* opC return Status::OK(); } -Status NamespaceDetailsCollectionCatalogEntry::prepareForIndexBuild(OperationContext* opCtx, - const IndexDescriptor* desc) { +Status NamespaceDetailsCollectionCatalogEntry::prepareForIndexBuild( + OperationContext* opCtx, const IndexDescriptor* desc, bool isBackgroundSecondaryBuild) { BSONObj spec = desc->infoObj(); // 1) entry in system.indexs // TODO SERVER-30638: using timestamp 0 for these inserts. diff --git a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h index 874f1716881..138cd2ee121 100644 --- a/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h +++ b/src/mongo/db/storage/mmap_v1/catalog/namespace_details_collection_entry.h @@ -90,7 +90,9 @@ public: Status removeIndex(OperationContext* opCtx, StringData indexName) final; - Status prepareForIndexBuild(OperationContext* opCtx, const IndexDescriptor* spec) final; + Status prepareForIndexBuild(OperationContext* opCtx, + const IndexDescriptor* spec, + bool isBackgroundSecondaryBuild) final; void indexBuildSuccess(OperationContext* opCtx, StringData indexName) final; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 91cd62c6546..4c123023773 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -838,6 +838,10 @@ Status WiredTigerKVEngine::dropIdent(OperationContext* opCtx, StringData ident) return Status::OK(); } + if (ret == ENOENT) { + return Status::OK(); + } + invariantWTOK(ret); return Status::OK(); } |