diff options
author | Samy Lanka <samy.lanka@mongodb.com> | 2022-01-10 16:59:08 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-01-10 22:30:03 +0000 |
commit | fe5da9a24783d5e3cf6e0f0f3dc2da7086c24ebd (patch) | |
tree | 766152b00bb7da589bd37c7974d659e758784e5f | |
parent | ba751252858213dd59b7d4c0b0d62f667e39d230 (diff) | |
download | mongo-fe5da9a24783d5e3cf6e0f0f3dc2da7086c24ebd.tar.gz |
SERVER-61934 Hold storage change lock while creating a new opCtx to prevent races with switching out the storage engine
(cherry picked from commit 9b60d431ed30814490c625c451b5c0e66a28d928)
-rw-r--r-- | src/mongo/db/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/service_context.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/service_context.h | 13 | ||||
-rw-r--r-- | src/mongo/db/storage/SConscript | 11 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_change_lock.cpp | 86 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_change_lock.h | 81 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_engine_change_context.cpp | 60 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_engine_change_context.h | 44 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_engine_init.cpp | 3 |
9 files changed, 202 insertions, 100 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 91bedb4cf6c..f12b1900b68 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -512,6 +512,7 @@ env.Library( 'write_concern_options', ], LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/storage/storage_change_lock', '$BUILD_DIR/mongo/util/processinfo', ], ) diff --git a/src/mongo/db/service_context.cpp b/src/mongo/db/service_context.cpp index ffd81da4346..0de0541aa88 100644 --- a/src/mongo/db/service_context.cpp +++ b/src/mongo/db/service_context.cpp @@ -248,6 +248,9 @@ ServiceContext::UniqueOperationContext ServiceContext::makeOperationContext(Clie } }); + // We must prevent changing the storage engine while setting a new opCtx on the client. + auto sharedStorageChangeToken = _storageChangeLk.acquireSharedStorageChangeToken(); + onCreate(opCtx.get(), _clientObservers); ScopeGuard onCreateGuard([&] { onDestroy(opCtx.get(), _clientObservers); }); diff --git a/src/mongo/db/service_context.h b/src/mongo/db/service_context.h index a9210c9a622..44e12901cea 100644 --- a/src/mongo/db/service_context.h +++ b/src/mongo/db/service_context.h @@ -37,6 +37,7 @@ #include "mongo/db/logical_session_id.h" #include "mongo/db/operation_id.h" +#include "mongo/db/storage/storage_change_lock.h" #include "mongo/db/storage/storage_engine.h" #include "mongo/platform/atomic_word.h" #include "mongo/platform/mutex.h" @@ -408,6 +409,13 @@ public: _storageEngine = nullptr; } + /** + * Return the storage change lock. + */ + StorageChangeLock& getStorageChangeLock() { + return _storageChangeLk; + } + // // Global operation management. This may not belong here and there may be too many methods // here. @@ -698,6 +706,11 @@ private: SyncUnique<StorageEngine> _storageEngine; /** + * The lock that protects changing out the storage engine. + */ + StorageChangeLock _storageChangeLk; + + /** * Vector of registered observers. */ std::vector<ClientObserverHolder> _clientObservers; diff --git a/src/mongo/db/storage/SConscript b/src/mongo/db/storage/SConscript index 5def6309f4d..a30c02eaa45 100644 --- a/src/mongo/db/storage/SConscript +++ b/src/mongo/db/storage/SConscript @@ -292,6 +292,16 @@ env.Library( ) env.Library( + target='storage_change_lock', + source=[ + 'storage_change_lock.cpp' + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + ], +) + +env.Library( target='storage_engine_common', source=[ 'storage_engine_init.cpp', @@ -302,6 +312,7 @@ env.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/concurrency/lock_manager', '$BUILD_DIR/mongo/db/service_context', + 'storage_change_lock', 'storage_control', 'storage_engine_lock_file', 'storage_engine_metadata', diff --git a/src/mongo/db/storage/storage_change_lock.cpp b/src/mongo/db/storage/storage_change_lock.cpp new file mode 100644 index 00000000000..9e7c8c19f81 --- /dev/null +++ b/src/mongo/db/storage/storage_change_lock.cpp @@ -0,0 +1,86 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/storage/storage_change_lock.h" + +#include "mongo/util/time_support.h" + +namespace mongo { +/** + * SharedSpinLock routines. + * + * The spin lock's lock word is logically divided into a bit (kExclusiveLock) and an unsigned + * integer value for the rest of the word. The meanings are as follows: + * + * kExclusiveLock not set, rest of word 0: Lock not held nor waited on. + * + * kExclusiveLock not set, rest of word non-zero: Lock held in shared mode by the number of holders + * specified in the rest of the word. + * + * kExclusiveLock set, rest of word 0: Lock held in exclusive mode, no shared waiters. + * + * kExclusiveLock set, rest of word non-zero: Lock held in exclusive mode, number of waiters for the + * shared lock specified in the rest of the word. + * + * Note that if there are shared waiters when the exclusive lock is released, they will obtain the + * lock before another exclusive lock can be obtained. This should be considered an implementation + * detail and not a guarantee. + * + */ +void StorageChangeLock::SharedSpinLock::lock() { + uint32_t expected = 0; + while (!_lockWord.compareAndSwap(&expected, kExclusiveLock)) { + expected = 0; + mongo::sleepmillis(100); + } +} + +void StorageChangeLock::SharedSpinLock::unlock() { + uint32_t prevLockWord = _lockWord.fetchAndBitAnd(~kExclusiveLock); + invariant(prevLockWord & kExclusiveLock); +} + +void StorageChangeLock::SharedSpinLock::lock_shared() { + uint32_t prevLockWord = _lockWord.fetchAndAdd(1); + // If the shared part of the lock word was all-ones, we just overflowed it. This requires + // 2^31 threads creating an opCtx at once, which shouldn't happen. + invariant((prevLockWord & ~kExclusiveLock) != ~kExclusiveLock); + while (MONGO_unlikely(prevLockWord & kExclusiveLock)) { + mongo::sleepmillis(kLockPollIntervalMillis); + prevLockWord = _lockWord.load(); + } +} + +void StorageChangeLock::SharedSpinLock::unlock_shared() { + uint32_t prevLockWord = _lockWord.fetchAndSubtract(1); + invariant(!(prevLockWord & kExclusiveLock)); +} +} // namespace mongo diff --git a/src/mongo/db/storage/storage_change_lock.h b/src/mongo/db/storage/storage_change_lock.h new file mode 100644 index 00000000000..9663c643542 --- /dev/null +++ b/src/mongo/db/storage/storage_change_lock.h @@ -0,0 +1,81 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <shared_mutex> + +#include "mongo/platform/atomic_word.h" +#include "mongo/stdx/mutex.h" + +namespace mongo { +class StorageChangeLock { + /** + * Token to be held by caller while changing the storage engine on the context. + */ +private: + class SharedSpinLock; + +public: + using Token = stdx::unique_lock<SharedSpinLock>; + + /** + * Acquires the storage change lock in shared mode and returns an RAII lock object to it. + */ + auto acquireSharedStorageChangeToken() { + // TODO(SERVER-59157): Replace use of std::shared_lock with stdx::shared_lock or remove + // NOLINT according to resolution of this ticket. + return std::shared_lock(_storageChangeSpinlock); // NOLINT + } + + Token acquireExclusiveStorageChangeToken() { + return stdx::unique_lock(_storageChangeSpinlock); // NOLINT + } + +private: + // Spin lock for storage change. Needs to be fast for lock_shared and unlock_shared, + // not for the exclusive lock. This lock has no fairness guarantees and is not re-entrant + // from shared -> exclusive (i.e. it cannot be upgraded), exclusive -> shared, + // or exclusive -> exclusive. + class SharedSpinLock { + public: + void lock(); + void unlock(); + void lock_shared(); + void unlock_shared(); + + private: + AtomicWord<uint32_t> _lockWord; + static const uint32_t kExclusiveLock = 1 << 31; + static const int kLockPollIntervalMillis = 100; + }; + + SharedSpinLock _storageChangeSpinlock; +}; +} // namespace mongo diff --git a/src/mongo/db/storage/storage_engine_change_context.cpp b/src/mongo/db/storage/storage_engine_change_context.cpp index fea4d2b4b1f..1f9ba9a4cb1 100644 --- a/src/mongo/db/storage/storage_engine_change_context.cpp +++ b/src/mongo/db/storage/storage_engine_change_context.cpp @@ -37,7 +37,6 @@ #include "mongo/db/operation_context.h" #include "mongo/db/storage/recovery_unit_noop.h" #include "mongo/logv2/log.h" -#include "mongo/util/time_support.h" namespace mongo { namespace { @@ -87,11 +86,11 @@ void StorageEngineChangeOperationContextDoneNotifier::setNotifyWhenDone(ServiceC _service = service; } -StorageEngineChangeContext::StorageChangeToken -StorageEngineChangeContext::killOpsForStorageEngineChange(ServiceContext* service) { +StorageChangeLock::Token StorageEngineChangeContext::killOpsForStorageEngineChange( + ServiceContext* service) { invariant(this == StorageEngineChangeContext::get(service)); // Prevent new operations from being created. - stdx::unique_lock storageChangeLk(_storageChangeSpinlock); + auto storageChangeLk = service->getStorageChangeLock().acquireExclusiveStorageChangeToken(); stdx::unique_lock lk(_mutex); { ServiceContext::LockedClientsCursor clientCursor(service); @@ -127,7 +126,7 @@ StorageEngineChangeContext::killOpsForStorageEngineChange(ServiceContext* servic } void StorageEngineChangeContext::changeStorageEngine(ServiceContext* service, - StorageChangeToken token, + StorageChangeLock::Token token, std::unique_ptr<StorageEngine> engine) { invariant(this == StorageEngineChangeContext::get(service)); service->setStorageEngine(std::move(engine)); @@ -145,55 +144,4 @@ void StorageEngineChangeContext::notifyOpCtxDestroyed() noexcept { if (_numOpCtxtsToWaitFor == 0) _allOldStorageOperationContextsReleased.notify_one(); } - -/** - * SharedSpinLock routines. - * - * The spin lock's lock word is logically divided into a bit (kExclusiveLock) and an unsigned - * integer value for the rest of the word. The meanings are as follows: - * - * kExclusiveLock not set, rest of word 0: Lock not held nor waited on. - * - * kExclusiveLock not set, rest of word non-zero: Lock held in shared mode by the number of holders - * specified in the rest of the word. - * - * kExclusiveLock set, rest of word 0: Lock held in exclusive mode, no shared waiters. - * - * kExclusiveLock set, rest of word non-zero: Lock held in exclusive mode, number of waiters for the - * shared lock specified in the rest of the word. - * - * Note that if there are shared waiters when the exclusive lock is released, they will obtain the - * lock before another exclusive lock can be obtained. This should be considered an implementation - * detail and not a guarantee. - * - */ -void StorageEngineChangeContext::SharedSpinLock::lock() { - uint32_t expected = 0; - while (!_lockWord.compareAndSwap(&expected, kExclusiveLock)) { - expected = 0; - mongo::sleepmillis(100); - } -} - -void StorageEngineChangeContext::SharedSpinLock::unlock() { - uint32_t prevLockWord = _lockWord.fetchAndBitAnd(~kExclusiveLock); - invariant(prevLockWord & kExclusiveLock); -} - -void StorageEngineChangeContext::SharedSpinLock::lock_shared() { - uint32_t prevLockWord = _lockWord.fetchAndAdd(1); - // If the shared part of the lock word was all-ones, we just overflowed it. This requires - // 2^31 threads creating an opCtx at once, which shouldn't happen. - invariant((prevLockWord & ~kExclusiveLock) != ~kExclusiveLock); - while (MONGO_unlikely(prevLockWord & kExclusiveLock)) { - mongo::sleepmillis(kLockPollIntervalMillis); - prevLockWord = _lockWord.load(); - } -} - -void StorageEngineChangeContext::SharedSpinLock::unlock_shared() { - uint32_t prevLockWord = _lockWord.fetchAndSubtract(1); - invariant(!(prevLockWord & kExclusiveLock)); -} - } // namespace mongo diff --git a/src/mongo/db/storage/storage_engine_change_context.h b/src/mongo/db/storage/storage_engine_change_context.h index 8994be6799d..d758044db74 100644 --- a/src/mongo/db/storage/storage_engine_change_context.h +++ b/src/mongo/db/storage/storage_engine_change_context.h @@ -33,8 +33,8 @@ #include "mongo/db/operation_id.h" #include "mongo/db/service_context.h" +#include "mongo/db/storage/storage_change_lock.h" #include "mongo/db/storage/storage_engine.h" -#include "mongo/platform/atomic_word.h" #include "mongo/stdx/condition_variable.h" #include "mongo/stdx/unordered_set.h" @@ -45,71 +45,33 @@ public: static StorageEngineChangeContext* get(ServiceContext* service); /** - * Token to be held by caller while changing the storage engine on the context. - */ -private: - class SharedSpinLock; - -public: - using StorageChangeToken = stdx::unique_lock<SharedSpinLock>; - // TODO(SERVER-59157): Replace two uses of std::shared_lock with stdx::shared_lock or remove - // NOLINT according to resolution of this ticket. - using SharedStorageChangeToken = std::shared_lock<SharedSpinLock>; // NOLINT - - /** * Start to change the storage engine for the associated ServiceContext. This will kill all * OperationContexts that have a non-noop Recovery Unit with an InterruptedDueToStorageChange * code, free the existing storage engine, and block any new operation contexts from being * created while the returned StorageChangeToken is in scope. */ - StorageChangeToken killOpsForStorageEngineChange(ServiceContext* service); + StorageChangeLock::Token killOpsForStorageEngineChange(ServiceContext* service); /** * Finish changing the storage engine for the associated ServiceContext. This will change the * storage engine and allow operation contexts to again be created. */ void changeStorageEngine(ServiceContext* service, - StorageChangeToken token, + StorageChangeLock::Token token, std::unique_ptr<StorageEngine> engine); /** - * Acquires the storage change lock in shared mode and returns an RAII lock object to it. - */ - SharedStorageChangeToken acquireSharedStorageChangeToken() { - return std::shared_lock(_storageChangeSpinlock); // NOLINT - } - - /** * Called by the decorator's destructor to tell us that an opCtx with the old storage engine has * been destroyed. */ void notifyOpCtxDestroyed() noexcept; private: - // Spin lock for storage change. Needs to be fast for lock_shared and unlock_shared, - // not for the exclusive lock. This lock has no fairness guarantees and is not re-entrant - // from shared -> exclusive (i.e. it cannot be upgraded), exclusive -> shared, - // or exclusive -> exclusive. - class SharedSpinLock { - public: - void lock(); - void unlock(); - void lock_shared(); - void unlock_shared(); - - private: - AtomicWord<uint32_t> _lockWord; - static const uint32_t kExclusiveLock = 1 << 31; - static const int kLockPollIntervalMillis = 100; - }; - Mutex _mutex = MONGO_MAKE_LATCH("StorageEngineChangeContext::_mutex"); // Keeps track of opCtxs associated with a storage engine that is being replaced. // Protected by _mutex int _numOpCtxtsToWaitFor = 0; stdx::condition_variable _allOldStorageOperationContextsReleased; - - SharedSpinLock _storageChangeSpinlock; }; } // namespace mongo diff --git a/src/mongo/db/storage/storage_engine_init.cpp b/src/mongo/db/storage/storage_engine_init.cpp index 911a5248db4..f65ca6231cf 100644 --- a/src/mongo/db/storage/storage_engine_init.cpp +++ b/src/mongo/db/storage/storage_engine_init.cpp @@ -357,9 +357,6 @@ public: opCtx->setLockState(std::make_unique<LockerImpl>()); auto service = opCtx->getServiceContext(); - // We must prevent storage engine changes while setting the recovery unit on the opCtx. - auto sharedStorageChangeToken = - StorageEngineChangeContext::get(service)->acquireSharedStorageChangeToken(); // There are a few cases where we don't have a storage engine available yet when creating an // operation context. |