diff options
author | Henrik Edin <henrik.edin@mongodb.com> | 2021-11-05 12:31:50 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-11-05 13:09:39 +0000 |
commit | 67ef8cd1ed3d15cee09de7905f7ba9fedfba176d (patch) | |
tree | 384696ea320a1b82b13c6074c9bb6e53cfa3027b | |
parent | 02d158c3b2dffbe90e84599739ca05563b5c378b (diff) | |
download | mongo-67ef8cd1ed3d15cee09de7905f7ba9fedfba176d.tar.gz |
SERVER-58736 Add BatchedCollectionCatalogWriter class
Allows multiple writes to be performed on the CollectionCatalog while reusing a single copy of the CollectionCatalog instance.
Used when opening the catalog to avoid quadratic behavior in the number of Collections in the catalog.
-rw-r--r-- | src/mongo/db/catalog/SConscript | 11 | ||||
-rw-r--r-- | src/mongo/db/catalog/catalog_control.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/catalog/collection_catalog.cpp | 47 | ||||
-rw-r--r-- | src/mongo/db/catalog/collection_catalog.h | 20 | ||||
-rw-r--r-- | src/mongo/db/catalog/collection_catalog_bm.cpp | 120 |
5 files changed, 206 insertions, 0 deletions
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript index bc19fa684c8..48817a012b3 100644 --- a/src/mongo/db/catalog/SConscript +++ b/src/mongo/db/catalog/SConscript @@ -272,6 +272,17 @@ env.Library( ] ) +env.Benchmark( + target='collection_catalog_bm', + source=[ + 'collection_catalog_bm.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/db/concurrency/lock_manager', + 'collection_catalog', + ], +) + env.Library( target='collection_catalog_helper', source=[ diff --git a/src/mongo/db/catalog/catalog_control.cpp b/src/mongo/db/catalog/catalog_control.cpp index 1a79839fde0..10bf24a5515 100644 --- a/src/mongo/db/catalog/catalog_control.cpp +++ b/src/mongo/db/catalog/catalog_control.cpp @@ -56,6 +56,9 @@ void reopenAllDatabasesAndReloadCollectionCatalog( Timestamp stableTimestamp) { // Open all databases and repopulate the CollectionCatalog. LOGV2(20276, "openCatalog: reopening all databases"); + boost::optional<BatchedCollectionCatalogWriter> catalogBatchWriter; + catalogBatchWriter.emplace(opCtx); + auto databaseHolder = DatabaseHolder::get(opCtx); std::vector<std::string> databasesToOpen = storageEngine->listDatabases(); for (auto&& dbName : databasesToOpen) { @@ -90,7 +93,12 @@ void reopenAllDatabasesAndReloadCollectionCatalog( // to the oplog. if (collNss.isOplog()) { LOGV2(20277, "openCatalog: updating cached oplog pointer"); + + // The oplog collection must be visible when establishing for repl. Finish our + // batched catalog write and continue on a new batch afterwards. + catalogBatchWriter.reset(); collection->establishOplogCollectionForLogging(opCtx); + catalogBatchWriter.emplace(opCtx); } } } diff --git a/src/mongo/db/catalog/collection_catalog.cpp b/src/mongo/db/catalog/collection_catalog.cpp index e45b1ed2954..d5d3c4da850 100644 --- a/src/mongo/db/catalog/collection_catalog.cpp +++ b/src/mongo/db/catalog/collection_catalog.cpp @@ -51,6 +51,8 @@ struct LatestCollectionCatalog { const ServiceContext::Decoration<LatestCollectionCatalog> getCatalog = ServiceContext::declareDecoration<LatestCollectionCatalog>(); +std::shared_ptr<CollectionCatalog> batchedCatalogWriteInstance; + /** * Decoration on OperationContext to store cloned Collections until they are committed or rolled * back TODO SERVER-51236: This should be merged with UncommittedCollections @@ -398,6 +400,13 @@ std::shared_ptr<const CollectionCatalog> CollectionCatalog::get(ServiceContext* } std::shared_ptr<const CollectionCatalog> CollectionCatalog::get(OperationContext* opCtx) { + // If there is a batched catalog write ongoing and we are the one doing it return this instance + // so we can observe our own writes. There may be other callers that reads the CollectionCatalog + // without any locks, they must see the immutable regular instance. + if (batchedCatalogWriteInstance && opCtx->lockState()->isW()) { + return batchedCatalogWriteInstance; + } + const auto& stashed = stashedCatalog(opCtx); if (stashed) return stashed; @@ -410,6 +419,11 @@ void CollectionCatalog::stash(OperationContext* opCtx, } void CollectionCatalog::write(ServiceContext* svcCtx, CatalogWriteFn job) { + // We should never have ongoing batching here. When batching is in progress the caller should + // use the overload with OperationContext so we can verify that the global exlusive lock is + // being held. + invariant(!batchedCatalogWriteInstance); + // It is potentially expensive to copy the collection catalog so we batch the operations by only // having one concurrent thread copying the catalog and executing all the write jobs. @@ -532,6 +546,14 @@ void CollectionCatalog::write(ServiceContext* svcCtx, CatalogWriteFn job) { void CollectionCatalog::write(OperationContext* opCtx, std::function<void(CollectionCatalog&)> job) { + // If global MODE_X lock are held we can re-use a cloned CollectionCatalog instance when + // 'batchedCatalogWriteInstance' is set. Make sure we are the one holding the write lock. + if (batchedCatalogWriteInstance) { + invariant(opCtx->lockState()->isW()); + job(*batchedCatalogWriteInstance); + return; + } + write(opCtx->getServiceContext(), std::move(job)); } @@ -1192,4 +1214,29 @@ const Collection* LookupCollectionForYieldRestore::operator()(OperationContext* return collection; } +BatchedCollectionCatalogWriter::BatchedCollectionCatalogWriter(OperationContext* opCtx) + : _opCtx(opCtx) { + invariant(_opCtx->lockState()->isW()); + invariant(!batchedCatalogWriteInstance); + + auto& storage = getCatalog(_opCtx->getServiceContext()); + // hold onto base so if we need to delete it we can do it outside of the lock + _base = atomic_load(&storage.catalog); + // copy the collection catalog, this could be expensive, store it for future writes during this + // batcher + batchedCatalogWriteInstance = std::make_shared<CollectionCatalog>(*_base); +} +BatchedCollectionCatalogWriter::~BatchedCollectionCatalogWriter() { + invariant(_opCtx->lockState()->isW()); + + // Publish out batched instance, validate that no other writers have been able to write during + // the batcher. + auto& storage = getCatalog(_opCtx->getServiceContext()); + invariant( + atomic_compare_exchange_strong(&storage.catalog, &_base, batchedCatalogWriteInstance)); + + // Clear out batched pointer so no more attempts of batching are made + batchedCatalogWriteInstance = nullptr; +} + } // namespace mongo diff --git a/src/mongo/db/catalog/collection_catalog.h b/src/mongo/db/catalog/collection_catalog.h index 21fa2122928..459298b6e7c 100644 --- a/src/mongo/db/catalog/collection_catalog.h +++ b/src/mongo/db/catalog/collection_catalog.h @@ -522,4 +522,24 @@ struct LookupCollectionForYieldRestore { const Collection* operator()(OperationContext* opCtx, CollectionUUID uuid) const; }; +/** + * RAII class to perform multiple writes to the CollectionCatalog on a single copy of the + * CollectionCatalog instance. Requires the global lock to be held in exclusive write mode (MODE_X) + * for the lifetime of this object. + */ +class BatchedCollectionCatalogWriter { +public: + BatchedCollectionCatalogWriter(OperationContext* opCtx); + ~BatchedCollectionCatalogWriter(); + + BatchedCollectionCatalogWriter(const BatchedCollectionCatalogWriter&) = delete; + BatchedCollectionCatalogWriter(BatchedCollectionCatalogWriter&&) = delete; + +private: + OperationContext* _opCtx; + // Store base when we clone the CollectionCatalog so we can verify that there has been no other + // writers during the batching. + std::shared_ptr<CollectionCatalog> _base = nullptr; +}; + } // namespace mongo diff --git a/src/mongo/db/catalog/collection_catalog_bm.cpp b/src/mongo/db/catalog/collection_catalog_bm.cpp new file mode 100644 index 00000000000..eedb6bf1e1c --- /dev/null +++ b/src/mongo/db/catalog/collection_catalog_bm.cpp @@ -0,0 +1,120 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include <benchmark/benchmark.h> + +#include "mongo/db/catalog/collection_catalog.h" +#include "mongo/db/catalog/collection_mock.h" +#include "mongo/db/concurrency/d_concurrency.h" +#include "mongo/db/concurrency/lock_state.h" +#include "mongo/db/operation_context.h" +#include "mongo/db/service_context.h" +#include "mongo/util/uuid.h" + +namespace mongo { + +namespace { + +class LockerImplClientObserver : public ServiceContext::ClientObserver { +public: + LockerImplClientObserver() = default; + ~LockerImplClientObserver() = default; + + void onCreateClient(Client* client) final {} + + void onDestroyClient(Client* client) final {} + + void onCreateOperationContext(OperationContext* opCtx) override { + opCtx->setLockState(std::make_unique<LockerImpl>()); + } + + void onDestroyOperationContext(OperationContext* opCtx) final {} +}; + +const ServiceContext::ConstructorActionRegisterer clientObserverRegisterer{ + "CollectionCatalogBenchmarkClientObserver", + [](ServiceContext* service) { + service->registerClientObserver(std::make_unique<LockerImplClientObserver>()); + }, + [](ServiceContext* serviceContext) {}}; + +ServiceContext* setupServiceContext() { + auto serviceContext = ServiceContext::make(); + auto serviceContextPtr = serviceContext.get(); + setGlobalServiceContext(std::move(serviceContext)); + return serviceContextPtr; +} + +void createCollections(OperationContext* opCtx, int numCollections) { + Lock::GlobalLock globalLk(opCtx, MODE_X); + BatchedCollectionCatalogWriter batched(opCtx); + + for (auto i = 0; i < numCollections; i++) { + const NamespaceString nss("collection_catalog_bm", std::to_string(i)); + CollectionCatalog::write(opCtx, [&](CollectionCatalog& catalog) { + catalog.registerCollection(opCtx, UUID::gen(), std::make_shared<CollectionMock>(nss)); + }); + } +} + +} // namespace + +void BM_CollectionCatalogWrite(benchmark::State& state) { + auto serviceContext = setupServiceContext(); + ThreadClient threadClient(serviceContext); + ServiceContext::UniqueOperationContext opCtx = threadClient->makeOperationContext(); + + createCollections(opCtx.get(), state.range(0)); + + for (auto _ : state) { + benchmark::ClobberMemory(); + CollectionCatalog::write(opCtx.get(), [&](CollectionCatalog& catalog) {}); + } +} + +void BM_CollectionCatalogWriteBatchedWithGlobalExclusiveLock(benchmark::State& state) { + auto serviceContext = setupServiceContext(); + ThreadClient threadClient(serviceContext); + ServiceContext::UniqueOperationContext opCtx = threadClient->makeOperationContext(); + + createCollections(opCtx.get(), state.range(0)); + + Lock::GlobalLock globalLk(opCtx.get(), MODE_X); + BatchedCollectionCatalogWriter batched(opCtx.get()); + + for (auto _ : state) { + benchmark::ClobberMemory(); + CollectionCatalog::write(opCtx.get(), [&](CollectionCatalog& catalog) {}); + } +} + +BENCHMARK(BM_CollectionCatalogWrite)->Ranges({{{1}, {100'000}}}); +BENCHMARK(BM_CollectionCatalogWriteBatchedWithGlobalExclusiveLock)->Ranges({{{1}, {100'000}}}); + +} // namespace mongo |