summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Milkie <milkie@10gen.com>2019-06-05 15:18:19 -0400
committerEric Milkie <milkie@10gen.com>2019-06-18 13:19:38 -0400
commitbeaa372035ef597e77db2220bb624028417d9ce5 (patch)
tree53c4b25877c81fa7910aaea309b3220d8689e6d3
parent0382a804a18aa78690a4d4a247da579d4a12f352 (diff)
downloadmongo-beaa372035ef597e77db2220bb624028417d9ce5.tar.gz
SERVER-41462 unlock RSTL for index builds on secondaries
This is safe to do because index builds started on secondaries do not need to synchronize with step up or step down via the RSTL.
-rw-r--r--src/mongo/db/catalog/multi_index_block.cpp3
-rw-r--r--src/mongo/db/index/index_access_method.cpp27
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp22
-rw-r--r--src/mongo/db/index_builds_coordinator.h3
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.cpp2
5 files changed, 44 insertions, 13 deletions
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 65c6ed1d142..1b4c9cc33c6 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -122,6 +122,9 @@ void MultiIndexBlock::cleanUpAfterBuild(OperationContext* opCtx, Collection* col
// instead write a noop entry. A foreground `applyOps` index build may have a commit
// timestamp already set.
if (opCtx->recoveryUnit()->getCommitTimestamp().isNull() &&
+ // We need to avoid checking replication state if we do not hold the RSTL. If we do
+ // not hold the RSTL, we must be a build started on a secondary via replication.
+ opCtx->lockState()->isRSTLLocked() &&
replCoord->canAcceptWritesForDatabase(opCtx, "admin")) {
opCtx->getServiceContext()->getOpObserver()->onOpMessage(
opCtx,
diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp
index 4fd9a9264bc..fc415013018 100644
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@@ -145,17 +145,22 @@ bool AbstractIndexAccessMethod::ignoreKeyTooLong() {
// TODO SERVER-36385: Remove this when there is no KeyTooLong error.
bool AbstractIndexAccessMethod::shouldCheckIndexKeySize(OperationContext* opCtx) {
- // Don't check index key size if we cannot write to the collection. That indicates we are a
- // secondary node and we should accept any index key.
- const auto shouldRelaxConstraints =
- repl::ReplicationCoordinator::get(opCtx)->shouldRelaxIndexConstraints(opCtx,
- _btreeState->ns());
-
- // Don't check index key size if FCV hasn't been initialized.
- return !shouldRelaxConstraints &&
- serverGlobalParams.featureCompatibility.isVersionInitialized() &&
+ // The index key size ought to be checked for nodes in FCV 4.0. However, it is possible for an
+ // index build to have begun on a secondary while in FCV 4.2 and then have FCV drop to 4.0. For
+ // those builds, we should NOT check the index key length; instead of crashing, we are going to
+ // allow them.
+
+ // The check for RSTL confirms that we are a primary, or a secondary that started a build while
+ // in FCV 4.0. In FCV 4.2, secondary index builds unlock the RSTL, and thus are not allowed to
+ // call shouldRelaxIndexConstraints(), since that function checks replication state and that is
+ // not allowed unless you hold the RSTL.
+ return serverGlobalParams.featureCompatibility.isVersionInitialized() &&
serverGlobalParams.featureCompatibility.getVersion() ==
- ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40;
+ ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40 &&
+ opCtx->lockState()->isRSTLLocked() &&
+ !repl::ReplicationCoordinator::get(opCtx)->shouldRelaxIndexConstraints(opCtx,
+ _btreeState->ns());
+ ;
}
bool AbstractIndexAccessMethod::isFatalError(OperationContext* opCtx, Status status, BSONObj key) {
@@ -647,6 +652,8 @@ Status AbstractIndexAccessMethod::commitBulk(OperationContext* opCtx,
auto builder = std::unique_ptr<SortedDataBuilderInterface>(
_newInterface->getBulkBuilder(opCtx, dupsAllowed));
+ // We need to check the index key size possibly on a primary-started index build; never on a
+ // replicated index build.
bool checkIndexKeySize = shouldCheckIndexKeySize(opCtx);
BSONObj previousKey;
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 70b087f4dde..9b077a93011 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -258,8 +258,9 @@ Future<void> IndexBuildsCoordinator::joinIndexBuilds(const NamespaceString& nss,
return std::move(pf.future);
}
-void IndexBuildsCoordinator::interruptAllIndexBuilds(const std::string& reason) {
+void IndexBuildsCoordinator::interruptAllIndexBuildsForShutdown(const std::string& reason) {
stdx::unique_lock<stdx::mutex> lk(_mutex);
+ _shuttingDown = true;
// Signal all the index builds to stop.
for (auto& buildStateIt : _allIndexBuilds) {
@@ -783,6 +784,20 @@ void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx,
// OperationContext::checkForInterrupt() will see the kill status and respond
// accordingly (checkForInterrupt() will throw an exception while
// checkForInterruptNoAssert() returns an error Status).
+
+ // We need to drop the RSTL here, as we do not need synchronization with step up and
+ // step down. Dropping the RSTL is important because otherwise if we held the RSTL it
+ // would create deadlocks with prepared transactions on step up and step down. A
+ // deadlock could result if the index build was attempting to acquire a Collection S or
+ // X lock while a prepared transaction held a Collection IX lock, and a step down was
+ // waiting to acquire the RSTL in mode X.
+ // We should only drop the RSTL while in FCV 4.2, as prepared transactions can only
+ // occur in FCV 4.2.
+ if (serverGlobalParams.featureCompatibility.getVersion() ==
+ ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42) {
+ const bool unlocked = opCtx->lockState()->unlockRSTLforPrepare();
+ invariant(unlocked);
+ }
opCtx->runWithoutInterruptionExceptAtGlobalShutdown(
[&, this] { _buildIndex(opCtx, collection, *nss, replState, &collLock); });
} else {
@@ -823,6 +838,11 @@ void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx,
// Failed index builds should abort secondary oplog application.
if (replSetAndNotPrimary) {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ if (_shuttingDown) {
+ // Allow shutdown with success exit status, despite interrupted index builds.
+ return;
+ }
fassert(51101,
status.withContext(str::stream() << "Index build: " << replState->buildUUID
<< "; Database: "
diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h
index 0b6978be325..ff4933cdbbc 100644
--- a/src/mongo/db/index_builds_coordinator.h
+++ b/src/mongo/db/index_builds_coordinator.h
@@ -151,7 +151,7 @@ public:
* i.e. when the server is not accepting user requests and no internal operations are
* concurrently starting new index builds.
*/
- void interruptAllIndexBuilds(const std::string& reason);
+ void interruptAllIndexBuildsForShutdown(const std::string& reason);
/**
* Signals all of the index builds on the specified collection to abort and then waits until the
@@ -450,6 +450,7 @@ protected:
IndexBuildsManager _indexBuildsManager;
bool _sleepForTest = false;
+ bool _shuttingDown = false;
};
/**
diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp
index 4e9cca08386..b0bea23afa9 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.cpp
+++ b/src/mongo/db/index_builds_coordinator_mongod.cpp
@@ -78,7 +78,7 @@ void IndexBuildsCoordinatorMongod::shutdown() {
_threadPool.shutdown();
// Signal active builds to stop and wait for them to stop.
- interruptAllIndexBuilds("Index build interrupted due to shutdown.");
+ interruptAllIndexBuildsForShutdown("Index build interrupted due to shutdown.");
// Wait for active threads to finish.
_threadPool.join();