summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPierlauro Sciarelli <pierlauro.sciarelli@mongodb.com>2020-11-24 17:37:54 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-11-24 18:56:26 +0000
commit045dd7a5857aa1b34d6fd1d2f6e3e29900010749 (patch)
tree656e709aa435098c6276725eefbe7554cb1007e5
parent40c3bac93c2091cea665de686cae93ef90429775 (diff)
downloadmongo-045dd7a5857aa1b34d6fd1d2f6e3e29900010749.tar.gz
SERVER-51903 Avoid VectorClockPersistCommand deadlock upon fast replica state transitions
-rw-r--r--src/mongo/db/s/SConscript1
-rw-r--r--src/mongo/db/s/vector_clock_persist_command.cpp90
-rw-r--r--src/mongo/db/vector_clock_mongod.cpp83
3 files changed, 23 insertions, 151 deletions
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 76a66095f9c..373be39e030 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -341,7 +341,6 @@ env.Library(
'split_vector_command.cpp',
'txn_two_phase_commit_cmds.cpp',
'unset_sharding_command.cpp',
- 'vector_clock_persist_command.cpp',
'wait_for_ongoing_chunk_splits_command.cpp',
],
LIBDEPS_PRIVATE=[
diff --git a/src/mongo/db/s/vector_clock_persist_command.cpp b/src/mongo/db/s/vector_clock_persist_command.cpp
deleted file mode 100644
index 9df463430c4..00000000000
--- a/src/mongo/db/s/vector_clock_persist_command.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * Copyright (C) 2020-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kCommand
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/auth/action_type.h"
-#include "mongo/db/auth/authorization_session.h"
-#include "mongo/db/commands.h"
-#include "mongo/db/s/sharding_state.h"
-#include "mongo/db/vector_clock_mutable.h"
-
-namespace mongo {
-namespace {
-
-/**
- * Internal sharding command run on shard primaries to persist the vector clock state.
- */
-class VectorClockPersistCommand : public BasicCommand {
-public:
- VectorClockPersistCommand() : BasicCommand("_vectorClockPersist") {}
-
- Status checkAuthForCommand(Client* client,
- const std::string& dbname,
- const BSONObj& cmdObj) const override {
- uassert(ErrorCodes::Unauthorized,
- "Unauthorized",
- AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
- ResourcePattern::forDatabaseName(
- NamespaceString::kVectorClockNamespace.db().toString()),
- ActionType::internal));
- return Status::OK();
- }
-
- AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
- return AllowedOnSecondary::kNever;
- }
-
- bool supportsWriteConcern(const BSONObj& cmd) const override {
- return true;
- }
-
- std::string help() const override {
- return "Internal sharding command run on shard primaries to persist the vector clock "
- "state.";
- }
-
- bool run(OperationContext* opCtx,
- const std::string& dbname_unused,
- const BSONObj& cmdObj,
- BSONObjBuilder& result) override {
- auto const shardingState = ShardingState::get(opCtx);
- uassertStatusOK(shardingState->canAcceptShardedCommands());
-
- VectorClockMutable::get(opCtx)->waitForDurable().get(opCtx);
-
- return true;
- }
-
-} vectorClockPersistCmd;
-
-} // namespace
-} // namespace mongo
diff --git a/src/mongo/db/vector_clock_mongod.cpp b/src/mongo/db/vector_clock_mongod.cpp
index c369a49382a..deb1be5eeb5 100644
--- a/src/mongo/db/vector_clock_mongod.cpp
+++ b/src/mongo/db/vector_clock_mongod.cpp
@@ -34,7 +34,6 @@
#include "mongo/db/logical_time_validator.h"
#include "mongo/db/persistent_task_store.h"
#include "mongo/db/repl/replica_set_aware_service.h"
-#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/db/vector_clock_document_gen.h"
#include "mongo/db/vector_clock_mutable.h"
@@ -128,9 +127,6 @@ private:
// Protects the shared state below
Mutex _mutex = MONGO_MAKE_LATCH("VectorClockMongoD::_mutex");
- // This value is incremented every time the node changes its role between primary and secondary
- uint64_t _generation{0};
-
// If set to true, means that another operation already scheduled the `_queue` draining loop, if
// false it means that this operation must do it
bool _loopScheduled{false};
@@ -173,13 +169,11 @@ VectorClockMongoD::~VectorClockMongoD() = default;
void VectorClockMongoD::onStepUpBegin(OperationContext* opCtx, long long term) {
stdx::lock_guard lg(_mutex);
- ++_generation;
_durableTime.reset();
}
void VectorClockMongoD::onStepDown() {
stdx::lock_guard lg(_mutex);
- ++_generation;
_durableTime.reset();
}
@@ -251,15 +245,11 @@ SharedSemiFuture<void> VectorClockMongoD::_enqueueWaiterAndScheduleLoopIfNeeded(
}
Future<void> VectorClockMongoD::_doWhileQueueNotEmptyOrError(ServiceContext* service) {
- auto [p, f] = makePromiseFuture<std::pair<uint64_t, VectorTime>>();
+ auto [p, f] = makePromiseFuture<VectorTime>();
auto future = std::move(f)
- .then([this](std::pair<uint64_t, VectorTime> newDurableTime) {
+ .then([this](VectorTime newDurableTime) {
stdx::unique_lock ul(_mutex);
- uassert(ErrorCodes::InterruptedDueToReplStateChange,
- "VectorClock failed to persist due to stepdown",
- newDurableTime.first == _generation);
-
- _durableTime.emplace(newDurableTime.second);
+ _durableTime.emplace(newDurableTime);
ComparableVectorTime time{*_durableTime};
@@ -274,9 +264,9 @@ Future<void> VectorClockMongoD::_doWhileQueueNotEmptyOrError(ServiceContext* ser
// Make sure the VectorClock advances at least up to the just recovered
// durable time
- _advanceTime({newDurableTime.second.clusterTime(),
- newDurableTime.second.configTime(),
- newDurableTime.second.topologyTime()});
+ _advanceTime({newDurableTime.clusterTime(),
+ newDurableTime.configTime(),
+ newDurableTime.topologyTime()});
for (auto& p : promises)
p->emplaceValue();
@@ -307,9 +297,9 @@ Future<void> VectorClockMongoD::_doWhileQueueNotEmptyOrError(ServiceContext* ser
// Blocking work to recover and/or persist the current vector time
ExecutorFuture<void>(Grid::get(service)->getExecutorPool()->getFixedExecutor())
.then([this, service] {
- auto [generation, mustRecoverDurableTime] = [&] {
+ auto mustRecoverDurableTime = [&] {
stdx::lock_guard lg(_mutex);
- return std::make_pair(_generation, !_durableTime);
+ return !_durableTime;
}();
ThreadClient tc("VectorClockStateOperation", service);
@@ -336,50 +326,23 @@ Future<void> VectorClockMongoD::_doWhileQueueNotEmptyOrError(ServiceContext* ser
return true;
});
- return std::make_pair(
- generation,
- VectorTime({LogicalTime(Timestamp(0)),
- LogicalTime(durableVectorClock.getConfigTime()),
- LogicalTime(durableVectorClock.getTopologyTime())}));
- } else {
- auto vectorTime = getTime();
-
- auto* const replCoord = repl::ReplicationCoordinator::get(opCtx);
- if (replCoord->getMemberState().primary()) {
- // Persist as primary
- const VectorClockDocument vcd(vectorTime.configTime().asTimestamp(),
- vectorTime.topologyTime().asTimestamp());
-
- PersistentTaskStore<VectorClockDocument> store(
- NamespaceString::kVectorClockNamespace);
- store.upsert(opCtx,
- QUERY(VectorClockDocument::k_idFieldName << vcd.get_id()),
- vcd.toBSON(),
- WriteConcerns::kMajorityWriteConcern);
- } else {
- // Persist as secondary, by asking the primary
- auto const shardingState = ShardingState::get(opCtx);
- invariant(shardingState->enabled());
-
- auto selfShard = uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(
- opCtx, shardingState->shardId()));
-
- auto cmdResponse = uassertStatusOK(selfShard->runCommandWithFixedRetryAttempts(
- opCtx,
- ReadPreferenceSetting{ReadPreference::PrimaryOnly},
- NamespaceString::kVectorClockNamespace.db().toString(),
- BSON("_vectorClockPersist" << 1),
- Seconds{30},
- Shard::RetryPolicy::kIdempotent));
-
- uassertStatusOK(cmdResponse.commandStatus);
- }
-
- return std::make_pair(generation, vectorTime);
+ return VectorTime({LogicalTime(Timestamp(0)),
+ LogicalTime(durableVectorClock.getConfigTime()),
+ LogicalTime(durableVectorClock.getTopologyTime())});
}
+
+ auto vectorTime = getTime();
+ const VectorClockDocument vcd(vectorTime.configTime().asTimestamp(),
+ vectorTime.topologyTime().asTimestamp());
+
+ PersistentTaskStore<VectorClockDocument> store(NamespaceString::kVectorClockNamespace);
+ store.upsert(opCtx,
+ QUERY(VectorClockDocument::k_idFieldName << vcd.get_id()),
+ vcd.toBSON(),
+ WriteConcerns::kMajorityWriteConcern);
+ return vectorTime;
})
- .getAsync([this, promise = std::move(p)](
- StatusWith<std::pair<uint64_t, VectorTime>> swResult) mutable {
+ .getAsync([this, promise = std::move(p)](StatusWith<VectorTime> swResult) mutable {
promise.setFrom(std::move(swResult));
});