summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAntonio Fuschetto <antonio.fuschetto@mongodb.com>2021-04-07 18:21:57 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-04-23 08:54:09 +0000
commitb95494949b10daac02ae9c0da2d492d2e3294f6e (patch)
treec1bc1faf82268b390a1caa9f486162b9c2c9211f /src
parentf3fea9e4057d77316d258e0ed39aea8b8333ded5 (diff)
downloadmongo-b95494949b10daac02ae9c0da2d492d2e3294f6e.tar.gz
SERVER-55635 Make the setFCV document updates idempotent in a sharded cluster
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/commands/feature_compatibility_version.cpp93
-rw-r--r--src/mongo/db/commands/feature_compatibility_version.h14
-rw-r--r--src/mongo/db/commands/feature_compatibility_version_document.idl6
-rw-r--r--src/mongo/db/commands/set_feature_compatibility_version.idl7
-rw-r--r--src/mongo/db/commands/set_feature_compatibility_version_command.cpp59
5 files changed, 148 insertions, 31 deletions
diff --git a/src/mongo/db/commands/feature_compatibility_version.cpp b/src/mongo/db/commands/feature_compatibility_version.cpp
index 93fa3ac6de6..6ce5e94a728 100644
--- a/src/mongo/db/commands/feature_compatibility_version.cpp
+++ b/src/mongo/db/commands/feature_compatibility_version.cpp
@@ -200,20 +200,6 @@ bool isWriteableStorageEngine() {
return !storageGlobalParams.readOnly && (storageGlobalParams.engine != "devnull");
}
-// Returns the featureCompatibilityVersion document if it exists.
-boost::optional<BSONObj> findFcvDocument(OperationContext* opCtx) {
- // Ensure database is opened and exists.
- AutoGetOrCreateDb autoDb(opCtx, NamespaceString::kServerConfigurationNamespace.db(), MODE_IX);
-
- const auto query = BSON("_id" << FeatureCompatibilityVersionParser::kParameterName);
- const auto swFcv = repl::StorageInterface::get(opCtx)->findById(
- opCtx, NamespaceString::kServerConfigurationNamespace, query["_id"]);
- if (!swFcv.isOK()) {
- return boost::none;
- }
- return swFcv.getValue();
-}
-
/**
* Build update command for featureCompatibilityVersion document updates.
*/
@@ -253,13 +239,74 @@ void runUpdateCommand(OperationContext* opCtx, const FeatureCompatibilityVersion
} // namespace
+boost::optional<BSONObj> FeatureCompatibilityVersion::findFeatureCompatibilityVersionDocument(
+ OperationContext* opCtx) {
+ AutoGetOrCreateDb autoDb(opCtx, NamespaceString::kServerConfigurationNamespace.db(), MODE_IX);
+
+ const auto query = BSON("_id" << FeatureCompatibilityVersionParser::kParameterName);
+ const auto swFcv = repl::StorageInterface::get(opCtx)->findById(
+ opCtx, NamespaceString::kServerConfigurationNamespace, query["_id"]);
+ if (!swFcv.isOK()) {
+ return boost::none;
+ }
+ return swFcv.getValue();
+}
+
void FeatureCompatibilityVersion::validateSetFeatureCompatibilityVersionRequest(
- FCV fromVersion, FCV newVersion, bool isFromConfigServer) {
+ OperationContext* opCtx, const SetFeatureCompatibilityVersion& setFCVRequest, FCV fromVersion) {
+
+ auto newVersion = setFCVRequest.getCommandParameter();
+ auto isFromConfigServer = setFCVRequest.getFromConfigServer().value_or(false);
+
uassert(
5147403,
"cannot set featureCompatibilityVersion to '{}' while featureCompatibilityVersion is '{}'"_format(
FCVP::toString(newVersion), FCVP::toString(fromVersion)),
fcvTransitions.permitsTransition(fromVersion, newVersion, isFromConfigServer));
+
+ auto setFCVPhase = setFCVRequest.getPhase();
+ if (!isFromConfigServer || !setFCVPhase) {
+ return;
+ }
+
+ auto changeTimestamp = setFCVRequest.getChangeTimestamp();
+ invariant(changeTimestamp);
+
+ auto fcvObj = findFeatureCompatibilityVersionDocument(opCtx);
+ auto fcvDoc = FeatureCompatibilityVersionDocument::parse(
+ IDLParserErrorContext("featureCompatibilityVersionDocument"), fcvObj.get());
+ auto previousTimestamp = fcvDoc.getChangeTimestamp();
+
+ if (setFCVPhase == SetFCVPhaseEnum::kStart) {
+ uassert(
+ 5563501,
+ "Shard received a timestamp for phase 1 of the 'setFeatureCompatibilityVersion' "
+ "command which is too old, so the request is discarded. This may indicate that it is a "
+ "request related to a previous invocation of the 'setFeatureCompatibilityVersion' "
+ "command which, for example, was temporarily stuck on a router.",
+ previousTimestamp && previousTimestamp > changeTimestamp);
+ } else {
+ uassert(5563601,
+ "Cannot transition to fully upgraded or fully downgraded state if the shard is not "
+ "in kUpgrading or kDowngrading state",
+ serverGlobalParams.featureCompatibility.isUpgradingOrDowngrading());
+
+ tassert(5563502,
+ "Shard received a timestamp for phase-2 of the 'setFeatureCompatibilityVersion' "
+ "command that does not match the one received for phase-1, so the request is "
+ "discarded. This may indicate that it is a request related to a previous "
+ "invocation of the 'setFeatureCompatibilityVersion' command which, for example, "
+ "was temporarily stuck on a router.",
+ !previousTimestamp || previousTimestamp < changeTimestamp);
+
+ uassert(
+ 5563503,
+ "Shard received a timestamp for phase-2 of the 'setFeatureCompatibilityVersion' "
+ "command which is too old, so the request is discarded. This may indicate that it is a "
+ "request related to a previous invocation of the 'setFeatureCompatibilityVersion' "
+ "command which, for example, was temporarily stuck on a router.",
+ previousTimestamp > changeTimestamp);
+ }
}
void FeatureCompatibilityVersion::updateFeatureCompatibilityVersionDocument(
@@ -267,6 +314,7 @@ void FeatureCompatibilityVersion::updateFeatureCompatibilityVersionDocument(
ServerGlobalParams::FeatureCompatibility::Version fromVersion,
ServerGlobalParams::FeatureCompatibility::Version newVersion,
bool isFromConfigServer,
+ boost::optional<Timestamp> changeTimestamp,
bool setTargetVersion) {
// Only transition to fully upgraded or downgraded states when we
@@ -277,6 +325,17 @@ void FeatureCompatibilityVersion::updateFeatureCompatibilityVersionDocument(
: fcvTransitions.getTransitionalVersion(fromVersion, newVersion, isFromConfigServer);
FeatureCompatibilityVersionDocument fcvDoc =
fcvTransitions.getFCVDocument(transitioningVersion);
+
+ // The timestamp must be removed when downgrading to version 4.9 or 4.4. This is necessary to
+ // avoid the presence of an unknown field that old binaries are unable to deserialize.
+ if (transitioningVersion == ServerGlobalParams::FeatureCompatibility::Version::kVersion49 ||
+ transitioningVersion ==
+ ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo44) {
+ fcvDoc.setChangeTimestamp(boost::none);
+ } else {
+ fcvDoc.setChangeTimestamp(changeTimestamp);
+ }
+
runUpdateCommand(opCtx, fcvDoc);
}
@@ -360,7 +419,7 @@ void FeatureCompatibilityVersion::updateMinWireVersion() {
void FeatureCompatibilityVersion::initializeForStartup(OperationContext* opCtx) {
// Global write lock must be held.
invariant(opCtx->lockState()->isW());
- auto featureCompatibilityVersion = findFcvDocument(opCtx);
+ auto featureCompatibilityVersion = findFeatureCompatibilityVersionDocument(opCtx);
if (!featureCompatibilityVersion) {
return;
}
@@ -414,7 +473,7 @@ void FeatureCompatibilityVersion::fassertInitializedAfterStartup(OperationContex
return;
}
- auto fcvDocument = findFcvDocument(opCtx);
+ auto fcvDocument = findFeatureCompatibilityVersionDocument(opCtx);
auto const storageEngine = opCtx->getServiceContext()->getStorageEngine();
auto dbNames = storageEngine->listDatabases();
diff --git a/src/mongo/db/commands/feature_compatibility_version.h b/src/mongo/db/commands/feature_compatibility_version.h
index 41563a7f42c..dc7a2ca8ef3 100644
--- a/src/mongo/db/commands/feature_compatibility_version.h
+++ b/src/mongo/db/commands/feature_compatibility_version.h
@@ -33,6 +33,7 @@
#include "mongo/base/string_data.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/db/commands/feature_compatibility_version_document_gen.h"
+#include "mongo/db/commands/set_feature_compatibility_version_gen.h"
#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/server_options.h"
@@ -56,13 +57,19 @@ public:
static void fassertInitializedAfterStartup(OperationContext* opCtx);
/**
+ * Returns the on-disk feature compatibility version document if it exists.
+ */
+ static boost::optional<BSONObj> findFeatureCompatibilityVersionDocument(
+ OperationContext* opCtx);
+
+ /**
* uassert that a transition from fromVersion to newVersion is permitted. Different rules apply
* if the request is from a config server.
*/
static void validateSetFeatureCompatibilityVersionRequest(
- ServerGlobalParams::FeatureCompatibility::Version fromVersion,
- ServerGlobalParams::FeatureCompatibility::Version newVersion,
- bool isFromConfigServer);
+ OperationContext* opCtx,
+ const SetFeatureCompatibilityVersion& setFCVRequest,
+ ServerGlobalParams::FeatureCompatibility::Version fromVersion);
/**
* Updates the on-disk feature compatibility version document for the transition fromVersion ->
@@ -73,6 +80,7 @@ public:
ServerGlobalParams::FeatureCompatibility::Version fromVersion,
ServerGlobalParams::FeatureCompatibility::Version newVersion,
bool isFromConfigServer,
+ boost::optional<Timestamp> timestamp,
bool setTargetVersion);
/**
diff --git a/src/mongo/db/commands/feature_compatibility_version_document.idl b/src/mongo/db/commands/feature_compatibility_version_document.idl
index fc24de54653..90322ed5043 100644
--- a/src/mongo/db/commands/feature_compatibility_version_document.idl
+++ b/src/mongo/db/commands/feature_compatibility_version_document.idl
@@ -58,3 +58,9 @@ structs:
optional: true
validator:
callback: "FeatureCompatibilityVersionParser::validatePreviousVersionField"
+ changeTimestamp:
+ description: "Timestamp used to identify the 2-phase setFCV request. Both phases (kStart and
+ kComplete) must have the same timestamp for the entire sequence, and every new
+ sequence started must strictly have incrementing timestamp."
+ type: timestamp
+ optional: true
diff --git a/src/mongo/db/commands/set_feature_compatibility_version.idl b/src/mongo/db/commands/set_feature_compatibility_version.idl
index d6fb33b6193..87a36367e76 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version.idl
+++ b/src/mongo/db/commands/set_feature_compatibility_version.idl
@@ -75,3 +75,10 @@ commands:
shard, it will run the full setFCV sequence (both phases)."
type: SetFCVPhase
optional: true
+ changeTimestamp:
+ description: "Timestamp used to identify the 2-phase setFCV request. Both phases
+ (kStart and kComplete) must have the same timestamp for the entire
+ sequence, and every new sequence started must strictly have
+ incrementing timestamp."
+ type: timestamp
+ optional: true
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
index cf77ab48685..4bb4bd9a4d8 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
+++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
@@ -56,6 +56,7 @@
#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/db/server_options.h"
#include "mongo/db/transaction_history_iterator.h"
+#include "mongo/db/vector_clock.h"
#include "mongo/db/views/view_catalog.h"
#include "mongo/logv2/log.h"
#include "mongo/rpc/get_status_from_command_result.h"
@@ -294,14 +295,44 @@ public:
return true;
}
- auto isFromConfigServer = request.getFromConfigServer().value_or(false);
+ boost::optional<Timestamp> changeTimestamp;
+ if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
+ // The Config Server creates a new ID (i.e., timestamp) when it receives an upgrade or
+ // downgrade request. Alternatively, the request refers to a previously aborted
+ // operation for which the local FCV document must contain the ID to be reused.
+
+ if (!serverGlobalParams.featureCompatibility.isUpgradingOrDowngrading()) {
+ const auto now = VectorClock::get(opCtx)->getTime();
+ changeTimestamp = now.clusterTime().asTimestamp();
+ } else {
+ auto fcvObj =
+ FeatureCompatibilityVersion::findFeatureCompatibilityVersionDocument(opCtx);
+ auto fcvDoc = FeatureCompatibilityVersionDocument::parse(
+ IDLParserErrorContext("featureCompatibilityVersionDocument"), fcvObj.get());
+ changeTimestamp = fcvDoc.getChangeTimestamp();
+ invariant(changeTimestamp);
+ }
+ } else if (serverGlobalParams.clusterRole == ClusterRole::ShardServer &&
+ request.getPhase()) {
+ // Shards receive the timestamp from the Config Server's request.
+ changeTimestamp = request.getChangeTimestamp();
+ uassert(5563500,
+ "The 'timestamp' field is missing even though the node is running as a shard. "
+ "This may indicate that the 'setFeatureCompatibilityVersion' command was "
+ "invoked directly against the shard or that the config server has not been "
+ "upgraded to at least version 5.0.",
+ changeTimestamp);
+ }
+
FeatureCompatibilityVersion::validateSetFeatureCompatibilityVersionRequest(
- actualVersion, requestedVersion, isFromConfigServer);
+ opCtx, request, actualVersion);
uassert(5563600,
"'phase' field is only valid to be specified on shards",
!request.getPhase() || serverGlobalParams.clusterRole == ClusterRole::ShardServer);
+ auto isFromConfigServer = request.getFromConfigServer().value_or(false);
+
if (!request.getPhase() || request.getPhase() == SetFCVPhaseEnum::kStart) {
{
// Start transition to 'requestedVersion' by updating the local FCV document to a
@@ -316,6 +347,7 @@ public:
actualVersion,
requestedVersion,
isFromConfigServer,
+ changeTimestamp,
true /* setTargetVersion */);
}
@@ -331,19 +363,15 @@ public:
}
}
+ invariant(serverGlobalParams.featureCompatibility.isUpgradingOrDowngrading());
invariant(!request.getPhase() || request.getPhase() == SetFCVPhaseEnum::kComplete);
- uassert(5563601,
- "Cannot transition to fully upgraded or fully downgraded state if we are not in "
- "kUpgrading or kDowngrading state",
- serverGlobalParams.featureCompatibility.isUpgradingOrDowngrading());
-
hangAfterStartingFCVTransition.pauseWhileSet(opCtx);
if (requestedVersion > actualVersion) {
- _runUpgrade(opCtx, request);
+ _runUpgrade(opCtx, request, changeTimestamp);
} else {
- _runDowngrade(opCtx, request);
+ _runDowngrade(opCtx, request, changeTimestamp);
}
{
@@ -356,6 +384,7 @@ public:
serverGlobalParams.featureCompatibility.getVersion(),
requestedVersion,
isFromConfigServer,
+ changeTimestamp,
false /* setTargetVersion */);
}
@@ -363,13 +392,16 @@ public:
}
private:
- void _runUpgrade(OperationContext* opCtx, const SetFeatureCompatibilityVersion& request) {
+ void _runUpgrade(OperationContext* opCtx,
+ const SetFeatureCompatibilityVersion& request,
+ boost::optional<Timestamp> changeTimestamp) {
const auto requestedVersion = request.getCommandParameter();
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
// Tell the shards to enter phase-1 of setFCV
auto requestPhase1 = request;
requestPhase1.setPhase(SetFCVPhaseEnum::kStart);
+ requestPhase1.setChangeTimestamp(changeTimestamp);
uassertStatusOK(
ShardingCatalogManager::get(opCtx)->setFeatureCompatibilityVersionOnShards(
opCtx, CommandHelpers::appendMajorityWriteConcern(requestPhase1.toBSON({}))));
@@ -434,6 +466,7 @@ private:
// Tell the shards to enter phase-2 of setFCV (fully upgraded)
auto requestPhase2 = request;
requestPhase2.setPhase(SetFCVPhaseEnum::kComplete);
+ requestPhase2.setChangeTimestamp(changeTimestamp);
uassertStatusOK(
ShardingCatalogManager::get(opCtx)->setFeatureCompatibilityVersionOnShards(
opCtx, CommandHelpers::appendMajorityWriteConcern(requestPhase2.toBSON({}))));
@@ -483,13 +516,16 @@ private:
}
}
- void _runDowngrade(OperationContext* opCtx, const SetFeatureCompatibilityVersion& request) {
+ void _runDowngrade(OperationContext* opCtx,
+ const SetFeatureCompatibilityVersion& request,
+ boost::optional<Timestamp> changeTimestamp) {
const auto requestedVersion = request.getCommandParameter();
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
// Tell the shards to enter phase-1 of setFCV
auto requestPhase1 = request;
requestPhase1.setPhase(SetFCVPhaseEnum::kStart);
+ requestPhase1.setChangeTimestamp(changeTimestamp);
uassertStatusOK(
ShardingCatalogManager::get(opCtx)->setFeatureCompatibilityVersionOnShards(
opCtx, CommandHelpers::appendMajorityWriteConcern(requestPhase1.toBSON({}))));
@@ -572,6 +608,7 @@ private:
// Tell the shards to enter phase-2 of setFCV (fully downgraded)
auto requestPhase2 = request;
requestPhase2.setPhase(SetFCVPhaseEnum::kComplete);
+ requestPhase2.setChangeTimestamp(changeTimestamp);
uassertStatusOK(
ShardingCatalogManager::get(opCtx)->setFeatureCompatibilityVersionOnShards(
opCtx, CommandHelpers::appendMajorityWriteConcern(requestPhase2.toBSON({}))));