diff options
15 files changed, 348 insertions, 4 deletions
diff --git a/jstests/core/views/views_all_commands.js b/jstests/core/views/views_all_commands.js index bfbb0eba9c6..442e4d9b8c4 100644 --- a/jstests/core/views/views_all_commands.js +++ b/jstests/core/views/views_all_commands.js @@ -728,6 +728,7 @@ let viewsCommandTests = { validateDBMetadata: {command: {validateDBMetadata: 1, apiParameters: {version: "1", strict: true}}}, waitForOngoingChunkSplits: {skip: isUnrelated}, + voteAbortIndexBuild: {skip: isUnrelated}, voteCommitImportCollection: {skip: isUnrelated}, voteCommitIndexBuild: {skip: isUnrelated}, voteCommitTransaction: {skip: isUnrelated}, diff --git a/jstests/noPassthrough/vote_abort_index_build.js b/jstests/noPassthrough/vote_abort_index_build.js new file mode 100644 index 00000000000..ffbf100b9da --- /dev/null +++ b/jstests/noPassthrough/vote_abort_index_build.js @@ -0,0 +1,138 @@ +/** + * Tests the 'voteAbortIndexBuild' internal command. + * + * @tags: [ + * featureFlagIndexBuildGracefulErrorHandling, + * requires_replication, + * ] + */ +(function() { +"use strict"; + +load('jstests/noPassthrough/libs/index_build.js'); + +const rst = new ReplSetTest({ + nodes: [ + {}, + { + // Disallow elections on secondary. + rsConfig: { + priority: 0, + votes: 0, + }, + }, + {/* arbiter */ rsConfig: {arbiterOnly: true}} + ] +}); +const nodes = rst.startSet(); +rst.initiate(); + +const primary = rst.getPrimary(); + +const testDB = primary.getDB('test'); +const coll = testDB.getCollection('test'); + +// Insert one document to avoid empty collection optimization. +assert.commandWorked(coll.insert({a: 1})); + +// Pause the index build, using the 'hangAfterStartingIndexBuild' failpoint. +IndexBuildTest.pauseIndexBuilds(primary); + +const createIdx = IndexBuildTest.startIndexBuild(primary, coll.getFullName(), {'a': 1}); + +// Wait for the index build to start on the primary. +const opId = IndexBuildTest.waitForIndexBuildToStart(testDB, coll.getName(), 'a_1'); +IndexBuildTest.assertIndexBuildCurrentOpContents(testDB, opId); + +// The above opId check does not guarantee the index has actually registered, so 'listIndexes' may +// not list the index build in the below 'assertIndexes'. Make sure the build is registered to avoid +// sporadic failures. +assert.commandWorked(primary.adminCommand({ + waitForFailPoint: "hangAfterStartingIndexBuild", + timesEntered: 1, + maxTimeMS: kDefaultWaitForFailPointTimeout +})); + +// Extract the index build UUID. +const buildUUID = + IndexBuildTest.assertIndexes(coll, 2, ['_id_'], ['a_1'], {includeBuildUUIDs: true})['a_1'] + .buildUUID; + +const abortReason = jsTestName(); + +// Running 'voteAbortIndexBuild' as an arbiter node should fail. +assert.commandFailedWithCode(primary.adminCommand({ + voteAbortIndexBuild: buildUUID, + hostAndPort: rst.getArbiter().host, + reason: abortReason, + writeConcern: {w: "majority"} +}), + 7329201); + +IndexBuildTest.assertIndexes(coll, 2, ['_id_'], ['a_1']); + +// Running 'voteAbortIndexBuild' as a non-member node should fail. Use Doom's reserved port. +const invalidHostAndPort = "localhost:666"; +assert.commandFailedWithCode(primary.adminCommand({ + voteAbortIndexBuild: buildUUID, + hostAndPort: invalidHostAndPort, + reason: abortReason, + writeConcern: {w: "majority"} +}), + 7329201); + +IndexBuildTest.assertIndexes(coll, 2, ['_id_'], ['a_1']); + +// Running 'voteAbortIndexBuild' as data-bearing secondary, should fail due to missing reason. +assert.commandFailedWithCode(primary.adminCommand({ + voteAbortIndexBuild: buildUUID, + hostAndPort: rst.getSecondary().host, + writeConcern: {w: "majority"} +}), + 40414); + +// Running 'voteAbortIndexBuild' as data-bearing secondary, should succeed. +assert.commandWorked(primary.adminCommand({ + voteAbortIndexBuild: buildUUID, + hostAndPort: rst.getSecondary().host, + reason: abortReason, + writeConcern: {w: "majority"} +})); + +// Wait for the index build to stop. +const exitCode = createIdx({checkExitSuccess: false}); +assert.neq(0, exitCode, 'expected shell to exit abnormally due to index build abort'); + +// Verify none of the nodes list the aborted index, and all of them have replicated the +// 'abortIndexBuild' oplog entry. +for (const node of rst.nodes) { + // Skip arbiter. + if (node.getDB('admin')._helloOrLegacyHello().arbiterOnly) { + continue; + } + + const nodeDB = node.getDB(testDB.getName()); + const nodeColl = nodeDB.getCollection(coll.getName()); + + IndexBuildTest.assertIndexes(nodeColl, 1, ['_id_']); + + const cmdNs = nodeDB.getCollection('$cmd').getFullName(); + const ops = rst.dumpOplog(node, {op: 'c', ns: cmdNs, 'o.abortIndexBuild': coll.getName()}); + assert.eq(1, ops.length, 'abortIndexBuild oplog entry not found: ' + tojson(ops)); + + // Verify the abort reason is replicated in the oplog. + const replicatedErrorMsg = ops[0].o.cause.errmsg; + assert.neq(-1, replicatedErrorMsg.indexOf(abortReason)); +} + +// Re-issuing 'voteAbortIndexBuild' command on already aborted build should fail. +assert.commandFailedWithCode(primary.adminCommand({ + voteAbortIndexBuild: buildUUID, + hostAndPort: rst.getSecondary().host, + reason: abortReason, + writeConcern: {w: "majority"} +}), + 7329202); + +rst.stopSet(); +})(); diff --git a/jstests/replsets/all_commands_downgrading_to_upgraded.js b/jstests/replsets/all_commands_downgrading_to_upgraded.js index 2c058b7bb24..79008dbc764 100644 --- a/jstests/replsets/all_commands_downgrading_to_upgraded.js +++ b/jstests/replsets/all_commands_downgrading_to_upgraded.js @@ -1209,6 +1209,7 @@ const allCommands = { validateDBMetadata: { command: {validateDBMetadata: 1, apiParameters: {version: "1", strict: true}}, }, + voteAbortIndexBuild: {skip: isAnInternalCommand}, voteCommitImportCollection: {skip: isAnInternalCommand}, voteCommitIndexBuild: {skip: isAnInternalCommand}, waitForFailPoint: { diff --git a/jstests/replsets/db_reads_while_recovering_all_commands.js b/jstests/replsets/db_reads_while_recovering_all_commands.js index 9beef7db0af..f14bbcd6b09 100644 --- a/jstests/replsets/db_reads_while_recovering_all_commands.js +++ b/jstests/replsets/db_reads_while_recovering_all_commands.js @@ -416,6 +416,7 @@ const allCommands = { expectFailure: true, expectedErrorCode: ErrorCodes.NotPrimaryOrSecondary, }, + voteAbortIndexBuild: {skip: isNotAUserDataRead}, voteCommitImportCollection: {skip: isNotAUserDataRead}, voteCommitIndexBuild: {skip: isNotAUserDataRead}, waitForFailPoint: {skip: isNotAUserDataRead}, diff --git a/jstests/replsets/tenant_migration_concurrent_writes_on_donor_util.js b/jstests/replsets/tenant_migration_concurrent_writes_on_donor_util.js index 103869f3042..1a8837c09d5 100644 --- a/jstests/replsets/tenant_migration_concurrent_writes_on_donor_util.js +++ b/jstests/replsets/tenant_migration_concurrent_writes_on_donor_util.js @@ -710,6 +710,7 @@ export const TenantMigrationConcurrentWriteUtil = { updateUser: {skip: isNotRunOnUserDatabase}, usersInfo: {skip: isNotRunOnUserDatabase}, validate: {skip: isNotWriteCommand}, + voteAbortIndexBuild: {skip: isNotRunOnUserDatabase}, voteCommitIndexBuild: {skip: isNotRunOnUserDatabase}, waitForFailPoint: {skip: isNotRunOnUserDatabase}, waitForOngoingChunkSplits: {skip: isNotRunOnUserDatabase}, diff --git a/jstests/sharding/read_write_concern_defaults_application.js b/jstests/sharding/read_write_concern_defaults_application.js index 75272f2e978..508727be778 100644 --- a/jstests/sharding/read_write_concern_defaults_application.js +++ b/jstests/sharding/read_write_concern_defaults_application.js @@ -770,6 +770,7 @@ let testCases = { usersInfo: {skip: "does not accept read or write concern"}, validate: {skip: "does not accept read or write concern"}, validateDBMetadata: {skip: "does not accept read or write concern"}, + voteAbortIndexBuild: {skip: "internal command"}, voteCommitImportCollection: {skip: "internal command"}, voteCommitIndexBuild: {skip: "internal command"}, waitForFailPoint: {skip: "does not accept read or write concern"}, diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript index 880a40eb479..9a1c5b83865 100644 --- a/src/mongo/db/commands/SConscript +++ b/src/mongo/db/commands/SConscript @@ -560,9 +560,10 @@ env.Library( "top_command.cpp", "txn_cmds.cpp", "user_management_commands.cpp", + "vote_abort_index_build_command.cpp", "vote_commit_index_build_command.cpp", 'internal_rename_if_options_and_indexes_match.idl', - 'vote_commit_index_build.idl', + 'vote_index_build.idl', ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/client/clientdriver_minimal', diff --git a/src/mongo/db/commands/vote_abort_index_build_command.cpp b/src/mongo/db/commands/vote_abort_index_build_command.cpp new file mode 100644 index 00000000000..e5577f60da8 --- /dev/null +++ b/src/mongo/db/commands/vote_abort_index_build_command.cpp @@ -0,0 +1,128 @@ +/** + * Copyright (C) 2023-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + + +#include "mongo/db/repl/replication_coordinator_fwd.h" +#include "mongo/platform/basic.h" + +#include "mongo/db/auth/authorization_session.h" +#include "mongo/db/commands.h" +#include "mongo/db/commands/vote_index_build_gen.h" +#include "mongo/db/index_builds_coordinator.h" +#include "mongo/db/repl/repl_client_info.h" +#include "mongo/db/storage/storage_parameters_gen.h" +#include "mongo/logv2/log.h" + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kCommand + + +namespace mongo { +namespace { + +/** + * Requests abortion of the index build identified by the provided index build UUID. + * + * { + * voteAbortIndexBuild: <index_build_uuid>, + * hostAndPort: "host:port", + * } + */ +class VoteAbortIndexBuildCommand final : public TypedCommand<VoteAbortIndexBuildCommand> { +public: + using Request = VoteAbortIndexBuild; + + std::string help() const override { + return "Internal intra replica set command to request that the primary abort an index " + "build with the specified UUID."; + } + + bool adminOnly() const override { + return true; + } + + AllowedOnSecondary secondaryAllowed(ServiceContext*) const override { + return AllowedOnSecondary::kNever; + } + + class Invocation final : public InvocationBase { + public: + using InvocationBase::InvocationBase; + + void typedRun(OperationContext* opCtx) { + auto lastOpBeforeRun = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(); + + const auto& cmd = request(); + LOGV2_DEBUG(7329200, + 1, + "Received voteAbortIndexBuild request for index build: {buildUUID}, " + "from host: {host}", + "Received voteAbortIndexBuild request", + "buildUUID"_attr = cmd.getCommandParameter(), + "host"_attr = cmd.getHostAndPort().toString(), + "reason"_attr = cmd.getReason()); + + uassertStatusOK(IndexBuildsCoordinator::get(opCtx)->voteAbortIndexBuild( + opCtx, cmd.getCommandParameter(), cmd.getHostAndPort(), cmd.getReason())); + + auto lastOpAfterRun = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(); + // If the client's lastOp before and after abort are equal, this means the index build + // was already aborted, and no "abortIndexBuild" oplog entry is generated under this + // client. We must still ensure a correct writeConcern wait on the pre-existing + // "abortIndexBuild" oplog entry before replying. We guarantee this by waiting for the + // system's last op time. + if (lastOpBeforeRun == lastOpAfterRun) { + repl::ReplClientInfo::forClient(opCtx->getClient()) + .setLastOpToSystemLastOpTime(opCtx); + } + } + + private: + NamespaceString ns() const override { + return NamespaceString(request().getDbName(), ""); + } + + bool supportsWriteConcern() const override { + return true; + } + + void doCheckAuthorization(OperationContext* opCtx) const override { + uassert(ErrorCodes::Unauthorized, + "Unauthorized", + AuthorizationSession::get(opCtx->getClient()) + ->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(), + ActionType::internal)); + } + }; +}; + +MONGO_REGISTER_FEATURE_FLAGGED_COMMAND(VoteAbortIndexBuildCommand, + feature_flags::gIndexBuildGracefulErrorHandling); + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/commands/vote_commit_index_build_command.cpp b/src/mongo/db/commands/vote_commit_index_build_command.cpp index cf28b263838..475d23dddf1 100644 --- a/src/mongo/db/commands/vote_commit_index_build_command.cpp +++ b/src/mongo/db/commands/vote_commit_index_build_command.cpp @@ -32,7 +32,7 @@ #include "mongo/db/auth/authorization_session.h" #include "mongo/db/commands.h" -#include "mongo/db/commands/vote_commit_index_build_gen.h" +#include "mongo/db/commands/vote_index_build_gen.h" #include "mongo/db/index_builds_coordinator.h" #include "mongo/db/repl/repl_client_info.h" #include "mongo/logv2/log.h" @@ -57,7 +57,8 @@ public: using Request = VoteCommitIndexBuild; std::string help() const override { - return "Internal intra replica set command"; + return "Internal intra replica set command to signal to the primary that a member is ready " + "to commit an index build."; } bool adminOnly() const override { diff --git a/src/mongo/db/commands/vote_commit_index_build.idl b/src/mongo/db/commands/vote_index_build.idl index 717a7be96e8..7cdf5818a8a 100644 --- a/src/mongo/db/commands/vote_commit_index_build.idl +++ b/src/mongo/db/commands/vote_index_build.idl @@ -39,7 +39,9 @@ commands: voteCommitIndexBuild: command_name: voteCommitIndexBuild cpp_name: VoteCommitIndexBuild - description: "An internal mongod command pertaining to cross replica set index builds" + description: "An internal mongod command pertaining to cross replica set index builds. + Can only be run on a primary node to signal that the node identified with 'hostAndPort' + is ready to commit the index build identified by the UUID in the command." strict: false namespace: type api_version: "" @@ -48,3 +50,21 @@ commands: hostAndPort: type: HostAndPort description: "Replica set member identification" + + voteAbortIndexBuild: + command_name: voteAbortIndexBuild + cpp_name: VoteAbortIndexBuild + description: "An internal mongod command pertaining to cross replica set index builds. + Can only be run on primary node to request that an index build, indentified by UUID, be + aborted." + strict: false + namespace: type + api_version: "" + type: uuid + fields: + hostAndPort: + type: HostAndPort + description: "Replica set member identification" + reason: + type: string + description: "Index build abort reason on the issuing replica set member" diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h index b0775e628de..73fa65b3d22 100644 --- a/src/mongo/db/index_builds_coordinator.h +++ b/src/mongo/db/index_builds_coordinator.h @@ -346,6 +346,14 @@ public: IndexBuilds stopIndexBuildsForRollback(OperationContext* opCtx); /** + * Handles the 'voteAbortIndexBuild' command request. + */ + virtual Status voteAbortIndexBuild(OperationContext* opCtx, + const UUID& buildUUID, + const HostAndPort& hostAndPort, + const StringData& reason) = 0; + + /** * Handles the 'VoteCommitIndexBuild' command request. * Writes the host and port information of the replica set member that has voted to commit an * index build into config.system.indexBuilds collection. diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp index 70d0965d490..b7b3a0cb173 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.cpp +++ b/src/mongo/db/index_builds_coordinator_mongod.cpp @@ -445,6 +445,33 @@ IndexBuildsCoordinatorMongod::_startIndexBuild(OperationContext* opCtx, return replState->sharedPromise.getFuture(); } +Status IndexBuildsCoordinatorMongod::voteAbortIndexBuild(OperationContext* opCtx, + const UUID& buildUUID, + const HostAndPort& votingNode, + const StringData& reason) { + + auto memberConfig = + repl::ReplicationCoordinator::get(opCtx)->findConfigMemberByHostAndPort(votingNode); + if (!memberConfig || memberConfig->isArbiter()) { + return Status{ErrorCodes::Error{7329201}, + "Non-member and arbiter nodes cannot vote to abort an index build"}; + } + + bool aborted = abortIndexBuildByBuildUUID( + opCtx, + buildUUID, + IndexBuildAction::kPrimaryAbort, + fmt::format("'voteAbortIndexBuild' received from '{}': {}", votingNode.toString(), reason)); + + if (aborted) { + return Status::OK(); + } + + // Index build does not exist or cannot be aborted because it is committing. + // No need to wait for write concern. + return Status{ErrorCodes::Error{7329202}, "Index build cannot be aborted"}; +} + Status IndexBuildsCoordinatorMongod::voteCommitIndexBuild(OperationContext* opCtx, const UUID& buildUUID, const HostAndPort& votingNode) { diff --git a/src/mongo/db/index_builds_coordinator_mongod.h b/src/mongo/db/index_builds_coordinator_mongod.h index 9049d0bdfec..72e6f8e26fd 100644 --- a/src/mongo/db/index_builds_coordinator_mongod.h +++ b/src/mongo/db/index_builds_coordinator_mongod.h @@ -91,6 +91,11 @@ public: const UUID& buildUUID, const ResumeIndexInfo& resumeInfo) override; + Status voteAbortIndexBuild(OperationContext* opCtx, + const UUID& buildUUID, + const HostAndPort& hostAndPort, + const StringData& reason) override; + Status voteCommitIndexBuild(OperationContext* opCtx, const UUID& buildUUID, const HostAndPort& hostAndPort) override; diff --git a/src/mongo/embedded/index_builds_coordinator_embedded.cpp b/src/mongo/embedded/index_builds_coordinator_embedded.cpp index 471e6c1c369..e48d507e060 100644 --- a/src/mongo/embedded/index_builds_coordinator_embedded.cpp +++ b/src/mongo/embedded/index_builds_coordinator_embedded.cpp @@ -95,6 +95,13 @@ void IndexBuildsCoordinatorEmbedded::_waitForNextIndexBuildActionAndCommit( std::shared_ptr<ReplIndexBuildState> replState, const IndexBuildOptions& indexBuildOptions) {} +Status IndexBuildsCoordinatorEmbedded::voteAbortIndexBuild(OperationContext* opCtx, + const UUID& buildUUID, + const HostAndPort& hostAndPort, + const StringData& reason) { + MONGO_UNREACHABLE; +} + Status IndexBuildsCoordinatorEmbedded::voteCommitIndexBuild(OperationContext* opCtx, const UUID& buildUUID, const HostAndPort& hostAndPort) { diff --git a/src/mongo/embedded/index_builds_coordinator_embedded.h b/src/mongo/embedded/index_builds_coordinator_embedded.h index 34bbc0394f6..606dbb4c1c3 100644 --- a/src/mongo/embedded/index_builds_coordinator_embedded.h +++ b/src/mongo/embedded/index_builds_coordinator_embedded.h @@ -76,6 +76,10 @@ public: /** * None of the following functions should ever be called on an embedded server node. */ + Status voteAbortIndexBuild(OperationContext* opCtx, + const UUID& buildUUID, + const HostAndPort& hostAndPort, + const StringData& reason) override; Status voteCommitIndexBuild(OperationContext* opCtx, const UUID& buildUUID, const HostAndPort& hostAndPort) override; |