summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamy Lanka <samy.lanka@mongodb.com>2022-01-10 16:59:08 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-01-10 22:30:03 +0000
commitfe5da9a24783d5e3cf6e0f0f3dc2da7086c24ebd (patch)
tree766152b00bb7da589bd37c7974d659e758784e5f
parentba751252858213dd59b7d4c0b0d62f667e39d230 (diff)
downloadmongo-fe5da9a24783d5e3cf6e0f0f3dc2da7086c24ebd.tar.gz
SERVER-61934 Hold storage change lock while creating a new opCtx to prevent races with switching out the storage engine
(cherry picked from commit 9b60d431ed30814490c625c451b5c0e66a28d928)
-rw-r--r--src/mongo/db/SConscript1
-rw-r--r--src/mongo/db/service_context.cpp3
-rw-r--r--src/mongo/db/service_context.h13
-rw-r--r--src/mongo/db/storage/SConscript11
-rw-r--r--src/mongo/db/storage/storage_change_lock.cpp86
-rw-r--r--src/mongo/db/storage/storage_change_lock.h81
-rw-r--r--src/mongo/db/storage/storage_engine_change_context.cpp60
-rw-r--r--src/mongo/db/storage/storage_engine_change_context.h44
-rw-r--r--src/mongo/db/storage/storage_engine_init.cpp3
9 files changed, 202 insertions, 100 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 91bedb4cf6c..f12b1900b68 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -512,6 +512,7 @@ env.Library(
'write_concern_options',
],
LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/db/storage/storage_change_lock',
'$BUILD_DIR/mongo/util/processinfo',
],
)
diff --git a/src/mongo/db/service_context.cpp b/src/mongo/db/service_context.cpp
index ffd81da4346..0de0541aa88 100644
--- a/src/mongo/db/service_context.cpp
+++ b/src/mongo/db/service_context.cpp
@@ -248,6 +248,9 @@ ServiceContext::UniqueOperationContext ServiceContext::makeOperationContext(Clie
}
});
+ // We must prevent changing the storage engine while setting a new opCtx on the client.
+ auto sharedStorageChangeToken = _storageChangeLk.acquireSharedStorageChangeToken();
+
onCreate(opCtx.get(), _clientObservers);
ScopeGuard onCreateGuard([&] { onDestroy(opCtx.get(), _clientObservers); });
diff --git a/src/mongo/db/service_context.h b/src/mongo/db/service_context.h
index a9210c9a622..44e12901cea 100644
--- a/src/mongo/db/service_context.h
+++ b/src/mongo/db/service_context.h
@@ -37,6 +37,7 @@
#include "mongo/db/logical_session_id.h"
#include "mongo/db/operation_id.h"
+#include "mongo/db/storage/storage_change_lock.h"
#include "mongo/db/storage/storage_engine.h"
#include "mongo/platform/atomic_word.h"
#include "mongo/platform/mutex.h"
@@ -408,6 +409,13 @@ public:
_storageEngine = nullptr;
}
+ /**
+ * Return the storage change lock.
+ */
+ StorageChangeLock& getStorageChangeLock() {
+ return _storageChangeLk;
+ }
+
//
// Global operation management. This may not belong here and there may be too many methods
// here.
@@ -698,6 +706,11 @@ private:
SyncUnique<StorageEngine> _storageEngine;
/**
+ * The lock that protects changing out the storage engine.
+ */
+ StorageChangeLock _storageChangeLk;
+
+ /**
* Vector of registered observers.
*/
std::vector<ClientObserverHolder> _clientObservers;
diff --git a/src/mongo/db/storage/SConscript b/src/mongo/db/storage/SConscript
index 5def6309f4d..a30c02eaa45 100644
--- a/src/mongo/db/storage/SConscript
+++ b/src/mongo/db/storage/SConscript
@@ -292,6 +292,16 @@ env.Library(
)
env.Library(
+ target='storage_change_lock',
+ source=[
+ 'storage_change_lock.cpp'
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/base',
+ ],
+)
+
+env.Library(
target='storage_engine_common',
source=[
'storage_engine_init.cpp',
@@ -302,6 +312,7 @@ env.Library(
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/concurrency/lock_manager',
'$BUILD_DIR/mongo/db/service_context',
+ 'storage_change_lock',
'storage_control',
'storage_engine_lock_file',
'storage_engine_metadata',
diff --git a/src/mongo/db/storage/storage_change_lock.cpp b/src/mongo/db/storage/storage_change_lock.cpp
new file mode 100644
index 00000000000..9e7c8c19f81
--- /dev/null
+++ b/src/mongo/db/storage/storage_change_lock.cpp
@@ -0,0 +1,86 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/storage/storage_change_lock.h"
+
+#include "mongo/util/time_support.h"
+
+namespace mongo {
+/**
+ * SharedSpinLock routines.
+ *
+ * The spin lock's lock word is logically divided into a bit (kExclusiveLock) and an unsigned
+ * integer value for the rest of the word. The meanings are as follows:
+ *
+ * kExclusiveLock not set, rest of word 0: Lock not held nor waited on.
+ *
+ * kExclusiveLock not set, rest of word non-zero: Lock held in shared mode by the number of holders
+ * specified in the rest of the word.
+ *
+ * kExclusiveLock set, rest of word 0: Lock held in exclusive mode, no shared waiters.
+ *
+ * kExclusiveLock set, rest of word non-zero: Lock held in exclusive mode, number of waiters for the
+ * shared lock specified in the rest of the word.
+ *
+ * Note that if there are shared waiters when the exclusive lock is released, they will obtain the
+ * lock before another exclusive lock can be obtained. This should be considered an implementation
+ * detail and not a guarantee.
+ *
+ */
+void StorageChangeLock::SharedSpinLock::lock() {
+ uint32_t expected = 0;
+ while (!_lockWord.compareAndSwap(&expected, kExclusiveLock)) {
+ expected = 0;
+ mongo::sleepmillis(100);
+ }
+}
+
+void StorageChangeLock::SharedSpinLock::unlock() {
+ uint32_t prevLockWord = _lockWord.fetchAndBitAnd(~kExclusiveLock);
+ invariant(prevLockWord & kExclusiveLock);
+}
+
+void StorageChangeLock::SharedSpinLock::lock_shared() {
+ uint32_t prevLockWord = _lockWord.fetchAndAdd(1);
+ // If the shared part of the lock word was all-ones, we just overflowed it. This requires
+ // 2^31 threads creating an opCtx at once, which shouldn't happen.
+ invariant((prevLockWord & ~kExclusiveLock) != ~kExclusiveLock);
+ while (MONGO_unlikely(prevLockWord & kExclusiveLock)) {
+ mongo::sleepmillis(kLockPollIntervalMillis);
+ prevLockWord = _lockWord.load();
+ }
+}
+
+void StorageChangeLock::SharedSpinLock::unlock_shared() {
+ uint32_t prevLockWord = _lockWord.fetchAndSubtract(1);
+ invariant(!(prevLockWord & kExclusiveLock));
+}
+} // namespace mongo
diff --git a/src/mongo/db/storage/storage_change_lock.h b/src/mongo/db/storage/storage_change_lock.h
new file mode 100644
index 00000000000..9663c643542
--- /dev/null
+++ b/src/mongo/db/storage/storage_change_lock.h
@@ -0,0 +1,81 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <shared_mutex>
+
+#include "mongo/platform/atomic_word.h"
+#include "mongo/stdx/mutex.h"
+
+namespace mongo {
+class StorageChangeLock {
+ /**
+ * Token to be held by caller while changing the storage engine on the context.
+ */
+private:
+ class SharedSpinLock;
+
+public:
+ using Token = stdx::unique_lock<SharedSpinLock>;
+
+ /**
+ * Acquires the storage change lock in shared mode and returns an RAII lock object to it.
+ */
+ auto acquireSharedStorageChangeToken() {
+ // TODO(SERVER-59157): Replace use of std::shared_lock with stdx::shared_lock or remove
+ // NOLINT according to resolution of this ticket.
+ return std::shared_lock(_storageChangeSpinlock); // NOLINT
+ }
+
+ Token acquireExclusiveStorageChangeToken() {
+ return stdx::unique_lock(_storageChangeSpinlock); // NOLINT
+ }
+
+private:
+ // Spin lock for storage change. Needs to be fast for lock_shared and unlock_shared,
+ // not for the exclusive lock. This lock has no fairness guarantees and is not re-entrant
+ // from shared -> exclusive (i.e. it cannot be upgraded), exclusive -> shared,
+ // or exclusive -> exclusive.
+ class SharedSpinLock {
+ public:
+ void lock();
+ void unlock();
+ void lock_shared();
+ void unlock_shared();
+
+ private:
+ AtomicWord<uint32_t> _lockWord;
+ static const uint32_t kExclusiveLock = 1 << 31;
+ static const int kLockPollIntervalMillis = 100;
+ };
+
+ SharedSpinLock _storageChangeSpinlock;
+};
+} // namespace mongo
diff --git a/src/mongo/db/storage/storage_engine_change_context.cpp b/src/mongo/db/storage/storage_engine_change_context.cpp
index fea4d2b4b1f..1f9ba9a4cb1 100644
--- a/src/mongo/db/storage/storage_engine_change_context.cpp
+++ b/src/mongo/db/storage/storage_engine_change_context.cpp
@@ -37,7 +37,6 @@
#include "mongo/db/operation_context.h"
#include "mongo/db/storage/recovery_unit_noop.h"
#include "mongo/logv2/log.h"
-#include "mongo/util/time_support.h"
namespace mongo {
namespace {
@@ -87,11 +86,11 @@ void StorageEngineChangeOperationContextDoneNotifier::setNotifyWhenDone(ServiceC
_service = service;
}
-StorageEngineChangeContext::StorageChangeToken
-StorageEngineChangeContext::killOpsForStorageEngineChange(ServiceContext* service) {
+StorageChangeLock::Token StorageEngineChangeContext::killOpsForStorageEngineChange(
+ ServiceContext* service) {
invariant(this == StorageEngineChangeContext::get(service));
// Prevent new operations from being created.
- stdx::unique_lock storageChangeLk(_storageChangeSpinlock);
+ auto storageChangeLk = service->getStorageChangeLock().acquireExclusiveStorageChangeToken();
stdx::unique_lock lk(_mutex);
{
ServiceContext::LockedClientsCursor clientCursor(service);
@@ -127,7 +126,7 @@ StorageEngineChangeContext::killOpsForStorageEngineChange(ServiceContext* servic
}
void StorageEngineChangeContext::changeStorageEngine(ServiceContext* service,
- StorageChangeToken token,
+ StorageChangeLock::Token token,
std::unique_ptr<StorageEngine> engine) {
invariant(this == StorageEngineChangeContext::get(service));
service->setStorageEngine(std::move(engine));
@@ -145,55 +144,4 @@ void StorageEngineChangeContext::notifyOpCtxDestroyed() noexcept {
if (_numOpCtxtsToWaitFor == 0)
_allOldStorageOperationContextsReleased.notify_one();
}
-
-/**
- * SharedSpinLock routines.
- *
- * The spin lock's lock word is logically divided into a bit (kExclusiveLock) and an unsigned
- * integer value for the rest of the word. The meanings are as follows:
- *
- * kExclusiveLock not set, rest of word 0: Lock not held nor waited on.
- *
- * kExclusiveLock not set, rest of word non-zero: Lock held in shared mode by the number of holders
- * specified in the rest of the word.
- *
- * kExclusiveLock set, rest of word 0: Lock held in exclusive mode, no shared waiters.
- *
- * kExclusiveLock set, rest of word non-zero: Lock held in exclusive mode, number of waiters for the
- * shared lock specified in the rest of the word.
- *
- * Note that if there are shared waiters when the exclusive lock is released, they will obtain the
- * lock before another exclusive lock can be obtained. This should be considered an implementation
- * detail and not a guarantee.
- *
- */
-void StorageEngineChangeContext::SharedSpinLock::lock() {
- uint32_t expected = 0;
- while (!_lockWord.compareAndSwap(&expected, kExclusiveLock)) {
- expected = 0;
- mongo::sleepmillis(100);
- }
-}
-
-void StorageEngineChangeContext::SharedSpinLock::unlock() {
- uint32_t prevLockWord = _lockWord.fetchAndBitAnd(~kExclusiveLock);
- invariant(prevLockWord & kExclusiveLock);
-}
-
-void StorageEngineChangeContext::SharedSpinLock::lock_shared() {
- uint32_t prevLockWord = _lockWord.fetchAndAdd(1);
- // If the shared part of the lock word was all-ones, we just overflowed it. This requires
- // 2^31 threads creating an opCtx at once, which shouldn't happen.
- invariant((prevLockWord & ~kExclusiveLock) != ~kExclusiveLock);
- while (MONGO_unlikely(prevLockWord & kExclusiveLock)) {
- mongo::sleepmillis(kLockPollIntervalMillis);
- prevLockWord = _lockWord.load();
- }
-}
-
-void StorageEngineChangeContext::SharedSpinLock::unlock_shared() {
- uint32_t prevLockWord = _lockWord.fetchAndSubtract(1);
- invariant(!(prevLockWord & kExclusiveLock));
-}
-
} // namespace mongo
diff --git a/src/mongo/db/storage/storage_engine_change_context.h b/src/mongo/db/storage/storage_engine_change_context.h
index 8994be6799d..d758044db74 100644
--- a/src/mongo/db/storage/storage_engine_change_context.h
+++ b/src/mongo/db/storage/storage_engine_change_context.h
@@ -33,8 +33,8 @@
#include "mongo/db/operation_id.h"
#include "mongo/db/service_context.h"
+#include "mongo/db/storage/storage_change_lock.h"
#include "mongo/db/storage/storage_engine.h"
-#include "mongo/platform/atomic_word.h"
#include "mongo/stdx/condition_variable.h"
#include "mongo/stdx/unordered_set.h"
@@ -45,71 +45,33 @@ public:
static StorageEngineChangeContext* get(ServiceContext* service);
/**
- * Token to be held by caller while changing the storage engine on the context.
- */
-private:
- class SharedSpinLock;
-
-public:
- using StorageChangeToken = stdx::unique_lock<SharedSpinLock>;
- // TODO(SERVER-59157): Replace two uses of std::shared_lock with stdx::shared_lock or remove
- // NOLINT according to resolution of this ticket.
- using SharedStorageChangeToken = std::shared_lock<SharedSpinLock>; // NOLINT
-
- /**
* Start to change the storage engine for the associated ServiceContext. This will kill all
* OperationContexts that have a non-noop Recovery Unit with an InterruptedDueToStorageChange
* code, free the existing storage engine, and block any new operation contexts from being
* created while the returned StorageChangeToken is in scope.
*/
- StorageChangeToken killOpsForStorageEngineChange(ServiceContext* service);
+ StorageChangeLock::Token killOpsForStorageEngineChange(ServiceContext* service);
/**
* Finish changing the storage engine for the associated ServiceContext. This will change the
* storage engine and allow operation contexts to again be created.
*/
void changeStorageEngine(ServiceContext* service,
- StorageChangeToken token,
+ StorageChangeLock::Token token,
std::unique_ptr<StorageEngine> engine);
/**
- * Acquires the storage change lock in shared mode and returns an RAII lock object to it.
- */
- SharedStorageChangeToken acquireSharedStorageChangeToken() {
- return std::shared_lock(_storageChangeSpinlock); // NOLINT
- }
-
- /**
* Called by the decorator's destructor to tell us that an opCtx with the old storage engine has
* been destroyed.
*/
void notifyOpCtxDestroyed() noexcept;
private:
- // Spin lock for storage change. Needs to be fast for lock_shared and unlock_shared,
- // not for the exclusive lock. This lock has no fairness guarantees and is not re-entrant
- // from shared -> exclusive (i.e. it cannot be upgraded), exclusive -> shared,
- // or exclusive -> exclusive.
- class SharedSpinLock {
- public:
- void lock();
- void unlock();
- void lock_shared();
- void unlock_shared();
-
- private:
- AtomicWord<uint32_t> _lockWord;
- static const uint32_t kExclusiveLock = 1 << 31;
- static const int kLockPollIntervalMillis = 100;
- };
-
Mutex _mutex = MONGO_MAKE_LATCH("StorageEngineChangeContext::_mutex");
// Keeps track of opCtxs associated with a storage engine that is being replaced.
// Protected by _mutex
int _numOpCtxtsToWaitFor = 0;
stdx::condition_variable _allOldStorageOperationContextsReleased;
-
- SharedSpinLock _storageChangeSpinlock;
};
} // namespace mongo
diff --git a/src/mongo/db/storage/storage_engine_init.cpp b/src/mongo/db/storage/storage_engine_init.cpp
index 911a5248db4..f65ca6231cf 100644
--- a/src/mongo/db/storage/storage_engine_init.cpp
+++ b/src/mongo/db/storage/storage_engine_init.cpp
@@ -357,9 +357,6 @@ public:
opCtx->setLockState(std::make_unique<LockerImpl>());
auto service = opCtx->getServiceContext();
- // We must prevent storage engine changes while setting the recovery unit on the opCtx.
- auto sharedStorageChangeToken =
- StorageEngineChangeContext::get(service)->acquireSharedStorageChangeToken();
// There are a few cases where we don't have a storage engine available yet when creating an
// operation context.