diff options
author | Pierlauro Sciarelli <pierlauro.sciarelli@mongodb.com> | 2020-11-24 17:37:54 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-11-24 18:56:26 +0000 |
commit | 045dd7a5857aa1b34d6fd1d2f6e3e29900010749 (patch) | |
tree | 656e709aa435098c6276725eefbe7554cb1007e5 | |
parent | 40c3bac93c2091cea665de686cae93ef90429775 (diff) | |
download | mongo-045dd7a5857aa1b34d6fd1d2f6e3e29900010749.tar.gz |
SERVER-51903 Avoid VectorClockPersistCommand deadlock upon fast replica state transitions
-rw-r--r-- | src/mongo/db/s/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/s/vector_clock_persist_command.cpp | 90 | ||||
-rw-r--r-- | src/mongo/db/vector_clock_mongod.cpp | 83 |
3 files changed, 23 insertions, 151 deletions
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript index 76a66095f9c..373be39e030 100644 --- a/src/mongo/db/s/SConscript +++ b/src/mongo/db/s/SConscript @@ -341,7 +341,6 @@ env.Library( 'split_vector_command.cpp', 'txn_two_phase_commit_cmds.cpp', 'unset_sharding_command.cpp', - 'vector_clock_persist_command.cpp', 'wait_for_ongoing_chunk_splits_command.cpp', ], LIBDEPS_PRIVATE=[ diff --git a/src/mongo/db/s/vector_clock_persist_command.cpp b/src/mongo/db/s/vector_clock_persist_command.cpp deleted file mode 100644 index 9df463430c4..00000000000 --- a/src/mongo/db/s/vector_clock_persist_command.cpp +++ /dev/null @@ -1,90 +0,0 @@ -/** - * Copyright (C) 2020-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * <http://www.mongodb.com/licensing/server-side-public-license>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kCommand - -#include "mongo/platform/basic.h" - -#include "mongo/db/auth/action_type.h" -#include "mongo/db/auth/authorization_session.h" -#include "mongo/db/commands.h" -#include "mongo/db/s/sharding_state.h" -#include "mongo/db/vector_clock_mutable.h" - -namespace mongo { -namespace { - -/** - * Internal sharding command run on shard primaries to persist the vector clock state. - */ -class VectorClockPersistCommand : public BasicCommand { -public: - VectorClockPersistCommand() : BasicCommand("_vectorClockPersist") {} - - Status checkAuthForCommand(Client* client, - const std::string& dbname, - const BSONObj& cmdObj) const override { - uassert(ErrorCodes::Unauthorized, - "Unauthorized", - AuthorizationSession::get(client)->isAuthorizedForActionsOnResource( - ResourcePattern::forDatabaseName( - NamespaceString::kVectorClockNamespace.db().toString()), - ActionType::internal)); - return Status::OK(); - } - - AllowedOnSecondary secondaryAllowed(ServiceContext*) const override { - return AllowedOnSecondary::kNever; - } - - bool supportsWriteConcern(const BSONObj& cmd) const override { - return true; - } - - std::string help() const override { - return "Internal sharding command run on shard primaries to persist the vector clock " - "state."; - } - - bool run(OperationContext* opCtx, - const std::string& dbname_unused, - const BSONObj& cmdObj, - BSONObjBuilder& result) override { - auto const shardingState = ShardingState::get(opCtx); - uassertStatusOK(shardingState->canAcceptShardedCommands()); - - VectorClockMutable::get(opCtx)->waitForDurable().get(opCtx); - - return true; - } - -} vectorClockPersistCmd; - -} // namespace -} // namespace mongo diff --git a/src/mongo/db/vector_clock_mongod.cpp b/src/mongo/db/vector_clock_mongod.cpp index c369a49382a..deb1be5eeb5 100644 --- a/src/mongo/db/vector_clock_mongod.cpp +++ b/src/mongo/db/vector_clock_mongod.cpp @@ -34,7 +34,6 @@ #include "mongo/db/logical_time_validator.h" #include "mongo/db/persistent_task_store.h" #include "mongo/db/repl/replica_set_aware_service.h" -#include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/s/sharding_state.h" #include "mongo/db/vector_clock_document_gen.h" #include "mongo/db/vector_clock_mutable.h" @@ -128,9 +127,6 @@ private: // Protects the shared state below Mutex _mutex = MONGO_MAKE_LATCH("VectorClockMongoD::_mutex"); - // This value is incremented every time the node changes its role between primary and secondary - uint64_t _generation{0}; - // If set to true, means that another operation already scheduled the `_queue` draining loop, if // false it means that this operation must do it bool _loopScheduled{false}; @@ -173,13 +169,11 @@ VectorClockMongoD::~VectorClockMongoD() = default; void VectorClockMongoD::onStepUpBegin(OperationContext* opCtx, long long term) { stdx::lock_guard lg(_mutex); - ++_generation; _durableTime.reset(); } void VectorClockMongoD::onStepDown() { stdx::lock_guard lg(_mutex); - ++_generation; _durableTime.reset(); } @@ -251,15 +245,11 @@ SharedSemiFuture<void> VectorClockMongoD::_enqueueWaiterAndScheduleLoopIfNeeded( } Future<void> VectorClockMongoD::_doWhileQueueNotEmptyOrError(ServiceContext* service) { - auto [p, f] = makePromiseFuture<std::pair<uint64_t, VectorTime>>(); + auto [p, f] = makePromiseFuture<VectorTime>(); auto future = std::move(f) - .then([this](std::pair<uint64_t, VectorTime> newDurableTime) { + .then([this](VectorTime newDurableTime) { stdx::unique_lock ul(_mutex); - uassert(ErrorCodes::InterruptedDueToReplStateChange, - "VectorClock failed to persist due to stepdown", - newDurableTime.first == _generation); - - _durableTime.emplace(newDurableTime.second); + _durableTime.emplace(newDurableTime); ComparableVectorTime time{*_durableTime}; @@ -274,9 +264,9 @@ Future<void> VectorClockMongoD::_doWhileQueueNotEmptyOrError(ServiceContext* ser // Make sure the VectorClock advances at least up to the just recovered // durable time - _advanceTime({newDurableTime.second.clusterTime(), - newDurableTime.second.configTime(), - newDurableTime.second.topologyTime()}); + _advanceTime({newDurableTime.clusterTime(), + newDurableTime.configTime(), + newDurableTime.topologyTime()}); for (auto& p : promises) p->emplaceValue(); @@ -307,9 +297,9 @@ Future<void> VectorClockMongoD::_doWhileQueueNotEmptyOrError(ServiceContext* ser // Blocking work to recover and/or persist the current vector time ExecutorFuture<void>(Grid::get(service)->getExecutorPool()->getFixedExecutor()) .then([this, service] { - auto [generation, mustRecoverDurableTime] = [&] { + auto mustRecoverDurableTime = [&] { stdx::lock_guard lg(_mutex); - return std::make_pair(_generation, !_durableTime); + return !_durableTime; }(); ThreadClient tc("VectorClockStateOperation", service); @@ -336,50 +326,23 @@ Future<void> VectorClockMongoD::_doWhileQueueNotEmptyOrError(ServiceContext* ser return true; }); - return std::make_pair( - generation, - VectorTime({LogicalTime(Timestamp(0)), - LogicalTime(durableVectorClock.getConfigTime()), - LogicalTime(durableVectorClock.getTopologyTime())})); - } else { - auto vectorTime = getTime(); - - auto* const replCoord = repl::ReplicationCoordinator::get(opCtx); - if (replCoord->getMemberState().primary()) { - // Persist as primary - const VectorClockDocument vcd(vectorTime.configTime().asTimestamp(), - vectorTime.topologyTime().asTimestamp()); - - PersistentTaskStore<VectorClockDocument> store( - NamespaceString::kVectorClockNamespace); - store.upsert(opCtx, - QUERY(VectorClockDocument::k_idFieldName << vcd.get_id()), - vcd.toBSON(), - WriteConcerns::kMajorityWriteConcern); - } else { - // Persist as secondary, by asking the primary - auto const shardingState = ShardingState::get(opCtx); - invariant(shardingState->enabled()); - - auto selfShard = uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard( - opCtx, shardingState->shardId())); - - auto cmdResponse = uassertStatusOK(selfShard->runCommandWithFixedRetryAttempts( - opCtx, - ReadPreferenceSetting{ReadPreference::PrimaryOnly}, - NamespaceString::kVectorClockNamespace.db().toString(), - BSON("_vectorClockPersist" << 1), - Seconds{30}, - Shard::RetryPolicy::kIdempotent)); - - uassertStatusOK(cmdResponse.commandStatus); - } - - return std::make_pair(generation, vectorTime); + return VectorTime({LogicalTime(Timestamp(0)), + LogicalTime(durableVectorClock.getConfigTime()), + LogicalTime(durableVectorClock.getTopologyTime())}); } + + auto vectorTime = getTime(); + const VectorClockDocument vcd(vectorTime.configTime().asTimestamp(), + vectorTime.topologyTime().asTimestamp()); + + PersistentTaskStore<VectorClockDocument> store(NamespaceString::kVectorClockNamespace); + store.upsert(opCtx, + QUERY(VectorClockDocument::k_idFieldName << vcd.get_id()), + vcd.toBSON(), + WriteConcerns::kMajorityWriteConcern); + return vectorTime; }) - .getAsync([this, promise = std::move(p)]( - StatusWith<std::pair<uint64_t, VectorTime>> swResult) mutable { + .getAsync([this, promise = std::move(p)](StatusWith<VectorTime> swResult) mutable { promise.setFrom(std::move(swResult)); }); |