From 6e0f8d23b304375591c7f5bce257cea348ae1928 Mon Sep 17 00:00:00 2001 From: Haley Connelly Date: Mon, 9 Jan 2023 15:10:47 +0000 Subject: SERVER-67290 Support repair on clustered collections with missing _mdb_catalog (cherry picked from commit bbfaa9d) --- jstests/disk/repair_clustered_collection.js | 87 ++++++++++++++++++++++ src/mongo/db/storage/SConscript | 1 + src/mongo/db/storage/durable_catalog.h | 7 +- src/mongo/db/storage/durable_catalog_impl.cpp | 7 +- src/mongo/db/storage/durable_catalog_impl.h | 4 +- src/mongo/db/storage/kv/kv_engine.h | 7 ++ src/mongo/db/storage/storage_engine_impl.cpp | 32 ++++++-- .../db/storage/wiredtiger/wiredtiger_kv_engine.cpp | 7 ++ .../db/storage/wiredtiger/wiredtiger_kv_engine.h | 2 + 9 files changed, 138 insertions(+), 16 deletions(-) create mode 100644 jstests/disk/repair_clustered_collection.js diff --git a/jstests/disk/repair_clustered_collection.js b/jstests/disk/repair_clustered_collection.js new file mode 100644 index 00000000000..001ac2dbeb2 --- /dev/null +++ b/jstests/disk/repair_clustered_collection.js @@ -0,0 +1,87 @@ +/** + * Tests that --repair on WiredTiger correctly and gracefully handles a missing _mdb_catalog when + * a clustered collection exists on the server instance. + * + * @tags: [requires_wiredtiger] + */ +(function() { + +load('jstests/disk/libs/wt_file_helper.js'); +load("jstests/libs/collection_drop_recreate.js"); + +const dbName = jsTestName(); +const collName = "test"; +const dbpath = MongoRunner.dataPath + dbName + "/"; + +const runRepairTest = function runRepairTestOnMongoDInstance( + collectionOptions, docToInsert, isTimeseries) { + let mongod = startMongodOnExistingPath(dbpath); + let db = mongod.getDB(dbName); + + assertDropCollection(db, collName); + assertCreateCollection(db, collName, collectionOptions); + + let testColl = db[collName]; + let testCollUri = getUriForColl(testColl); + let testCollFile = dbpath + testCollUri + ".wt"; + + assert.commandWorked(testColl.insert(docToInsert)); + + // A document repaired from a timeseries collection will be in a different format than the + // original document. This is because the timeseries's system.views collection will be not be + // associated with the orphaned clustered collection. Thus, the data will show up as it would + // have in the raw system.buckets collection for the timeseries collection. + const expectedOrphanDoc = + isTimeseries ? db["system.buckets." + collName].findOne() : testColl.findOne(); + + MongoRunner.stopMongod(mongod); + + // Delete the _mdb_catalog. + let mdbCatalogFile = dbpath + "_mdb_catalog.wt"; + jsTestLog("deleting catalog file: " + mdbCatalogFile); + removeFile(mdbCatalogFile); + + assertRepairSucceeds(dbpath, mongod.port); + + // Verify that repair succeeds in creating an empty catalog and MongoDB starts up normally with + // no data. + mongod = startMongodOnExistingPath(dbpath); + db = mongod.getDB(dbName); + testColl = db[collName]; + assert.isnull(testColl.exists()); + assert.eq(testColl.find(docToInsert).itcount(), 0); + assert.eq(testColl.count(), 0); + + // Ensure the orphaned collection is valid and the document is preserved. + const orphanedImportantCollName = "orphan." + testCollUri.replace(/-/g, "_"); + const localDb = mongod.getDB("local"); + orphanedCollection = localDb[orphanedImportantCollName]; + assert(orphanedCollection.exists()); + assert.eq(orphanedCollection.count(expectedOrphanDoc), + 1, + `Expected to find document ${tojson(expectedOrphanDoc)} but collection has contents ${ + tojson(orphanedCollection.find().toArray())}`); + + const validateResult = orphanedCollection.validate(); + assert(validateResult.valid); + MongoRunner.stopMongod(mongod); +}; + +// Standard clustered collection test. +let isTimeseries = false; +let clusteredCollOptions = {clusteredIndex: {key: {_id: 1}, unique: true}}; +let docToInsert = {_id: 1}; +runRepairTest(clusteredCollOptions, docToInsert, isTimeseries); + +// Timeseries test since all timeseries collections are implicitly clustered. +isTimeseries = true; +clusteredCollOptions = { + timeseries: {timeField: "timestamp", metaField: "metadata", granularity: "hours"} +}; +docToInsert = { + "metadata": {"sensorId": 5578, "type": "temperature"}, + "timestamp": ISODate("2021-05-18T00:00:00.000Z"), + "temp": 12 +}; +runRepairTest(clusteredCollOptions, docToInsert, isTimeseries); +})(); diff --git a/src/mongo/db/storage/SConscript b/src/mongo/db/storage/SConscript index 45931873ba3..6081de4589c 100644 --- a/src/mongo/db/storage/SConscript +++ b/src/mongo/db/storage/SConscript @@ -623,6 +623,7 @@ env.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/audit', '$BUILD_DIR/mongo/db/catalog/catalog_helpers', + '$BUILD_DIR/mongo/db/catalog/clustered_collection_options', '$BUILD_DIR/mongo/db/catalog/index_catalog', '$BUILD_DIR/mongo/db/multitenancy', '$BUILD_DIR/mongo/db/resumable_index_builds_idl', diff --git a/src/mongo/db/storage/durable_catalog.h b/src/mongo/db/storage/durable_catalog.h index 49db4546918..d41e8173883 100644 --- a/src/mongo/db/storage/durable_catalog.h +++ b/src/mongo/db/storage/durable_catalog.h @@ -123,11 +123,12 @@ public: /** * Create an entry in the catalog for an orphaned collection found in the * storage engine. Return the generated ns of the collection. - * Note that this function does not recreate the _id index on the collection because it does not - * have access to index catalog. + * Note that this function does not recreate the _id index on the for non-clustered collections + * because it does not have access to index catalog. */ virtual StatusWith newOrphanedIdent(OperationContext* opCtx, - std::string ident) = 0; + std::string ident, + const CollectionOptions& optionsWithUUID) = 0; virtual std::string getFilesystemPathForDb(const std::string& dbName) const = 0; diff --git a/src/mongo/db/storage/durable_catalog_impl.cpp b/src/mongo/db/storage/durable_catalog_impl.cpp index 13b86f9af07..c4ef8527e52 100644 --- a/src/mongo/db/storage/durable_catalog_impl.cpp +++ b/src/mongo/db/storage/durable_catalog_impl.cpp @@ -554,8 +554,8 @@ bool DurableCatalogImpl::isCollectionIdent(StringData ident) const { ident.find("collection/") != std::string::npos; } -StatusWith DurableCatalogImpl::newOrphanedIdent(OperationContext* opCtx, - std::string ident) { +StatusWith DurableCatalogImpl::newOrphanedIdent( + OperationContext* opCtx, std::string ident, const CollectionOptions& optionsWithUUID) { // The collection will be named local.orphan.xxxxx. std::string identNs = ident; std::replace(identNs.begin(), identNs.end(), '-', '_'); @@ -563,9 +563,6 @@ StatusWith DurableCatalogImpl::newOrphanedIdent(OperationContext* o NamespaceString nss(NamespaceString(NamespaceString::kOrphanCollectionDb, NamespaceString::kOrphanCollectionPrefix + identNs)); - // Generate a new UUID for the orphaned collection. - CollectionOptions optionsWithUUID; - optionsWithUUID.uuid.emplace(UUID::gen()); BSONObj obj; { BSONObjBuilder b; diff --git a/src/mongo/db/storage/durable_catalog_impl.h b/src/mongo/db/storage/durable_catalog_impl.h index 873733d58b4..bedaf4a79af 100644 --- a/src/mongo/db/storage/durable_catalog_impl.h +++ b/src/mongo/db/storage/durable_catalog_impl.h @@ -98,7 +98,9 @@ public: return _rs; } - StatusWith newOrphanedIdent(OperationContext* opCtx, std::string ident); + StatusWith newOrphanedIdent(OperationContext* opCtx, + std::string ident, + const CollectionOptions& optionsWithUUID); std::string getFilesystemPathForDb(const std::string& dbName) const; diff --git a/src/mongo/db/storage/kv/kv_engine.h b/src/mongo/db/storage/kv/kv_engine.h index 25f52f0e40f..304a695308e 100644 --- a/src/mongo/db/storage/kv/kv_engine.h +++ b/src/mongo/db/storage/kv/kv_engine.h @@ -421,6 +421,13 @@ public: return Status::OK(); } + /** + * Returns the 'KeyFormat' tied to 'ident'. + */ + virtual KeyFormat getKeyFormat(OperationContext* opCtx, StringData ident) const { + MONGO_UNREACHABLE; + } + /** * The destructor will never be called from mongod, but may be called from tests. * Engines may assume that this will only be called in the case of clean shutdown, even if diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp index 255f80efec1..6e9c57de195 100644 --- a/src/mongo/db/storage/storage_engine_impl.cpp +++ b/src/mongo/db/storage/storage_engine_impl.cpp @@ -35,6 +35,7 @@ #include "mongo/db/audit.h" #include "mongo/db/catalog/catalog_control.h" +#include "mongo/db/catalog/clustered_collection_util.h" #include "mongo/db/catalog/collection_catalog.h" #include "mongo/db/catalog/collection_catalog_helper.h" #include "mongo/db/catalog_raii.h" @@ -201,19 +202,36 @@ void StorageEngineImpl::loadCatalog(OperationContext* opCtx, LastShutdownState l // If the catalog does not have information about this // collection, we create an new entry for it. WriteUnitOfWork wuow(opCtx); - StatusWith statusWithNs = _catalog->newOrphanedIdent(opCtx, ident); + + auto keyFormat = _engine->getKeyFormat(opCtx, ident); + bool isClustered = keyFormat == KeyFormat::String; + CollectionOptions optionsWithUUID; + optionsWithUUID.uuid.emplace(UUID::gen()); + if (isClustered) { + optionsWithUUID.clusteredIndex = + clustered_util::makeDefaultClusteredIdIndex(); + } + + StatusWith statusWithNs = + _catalog->newOrphanedIdent(opCtx, ident, optionsWithUUID); + if (statusWithNs.isOK()) { wuow.commit(); auto orphanCollNs = statusWithNs.getValue(); LOGV2(22247, "Successfully created an entry in the catalog for orphaned " "collection", - "namespace"_attr = orphanCollNs); - LOGV2_WARNING(22265, - "Collection does not have an _id index. Please manually " - "build the index", - "namespace"_attr = orphanCollNs); - + "namespace"_attr = orphanCollNs, + "options"_attr = optionsWithUUID); + + if (!isClustered) { + // The _id index is already implicitly created on collections clustered + // by _id. + LOGV2_WARNING(22265, + "Collection does not have an _id index. Please manually " + "build the index", + "namespace"_attr = orphanCollNs); + } StorageRepairObserver::get(getGlobalServiceContext()) ->benignModification(str::stream() << "Orphan collection created: " << statusWithNs.getValue()); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 575335efe75..048cac044d2 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -2756,4 +2756,11 @@ Status WiredTigerKVEngine::reconfigureLogging() { return wtRCToStatus(_conn->reconfigure(_conn, verboseConfig.c_str()), nullptr); } +KeyFormat WiredTigerKVEngine::getKeyFormat(OperationContext* opCtx, StringData ident) const { + + const std::string wtTableConfig = + uassertStatusOK(WiredTigerUtil::getMetadataCreate(opCtx, "table:{}"_format(ident))); + return wtTableConfig.find("key_format=u") != string::npos ? KeyFormat::String : KeyFormat::Long; +} + } // namespace mongo diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h index 688db855b74..add1d9a9b29 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h @@ -383,6 +383,8 @@ public: Status reconfigureLogging() override; + KeyFormat getKeyFormat(OperationContext* opCtx, StringData ident) const override; + private: class WiredTigerSessionSweeper; -- cgit v1.2.1