diff options
author | Ian Boros <ian.boros@10gen.com> | 2018-03-15 19:00:49 -0400 |
---|---|---|
committer | Ian Boros <ian.boros@10gen.com> | 2018-03-27 10:44:21 -0400 |
commit | 19a12557f233b43859912622ef1cf77c8ae777a2 (patch) | |
tree | ad21723c2424b6eea2a95831f56a096c787d475c /src/mongo/db/commands | |
parent | 3770df1373a56e3394cbedd150c47397f5268681 (diff) | |
download | mongo-19a12557f233b43859912622ef1cf77c8ae777a2.tar.gz |
SERVER-33462 allow killOp on local mongos operations
Diffstat (limited to 'src/mongo/db/commands')
-rw-r--r-- | src/mongo/db/commands/SConscript | 7 | ||||
-rw-r--r-- | src/mongo/db/commands/find_cmd.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/commands/getmore_cmd.cpp | 85 | ||||
-rw-r--r-- | src/mongo/db/commands/kill_op.cpp | 97 | ||||
-rw-r--r-- | src/mongo/db/commands/kill_op_cmd_base.cpp | 132 | ||||
-rw-r--r-- | src/mongo/db/commands/kill_op_cmd_base.h | 100 |
6 files changed, 274 insertions, 151 deletions
diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript index b07fd756abf..fb60cfb4c04 100644 --- a/src/mongo/db/commands/SConscript +++ b/src/mongo/db/commands/SConscript @@ -215,6 +215,7 @@ env.Library( '$BUILD_DIR/mongo/db/catalog/index_key_validate', '$BUILD_DIR/mongo/db/cloner', '$BUILD_DIR/mongo/db/command_can_run_here', + '$BUILD_DIR/mongo/db/curop_failpoint_helpers', '$BUILD_DIR/mongo/db/exec/stagedebug_cmd', '$BUILD_DIR/mongo/db/index/index_access_method', '$BUILD_DIR/mongo/db/index_d', @@ -232,7 +233,7 @@ env.Library( 'core', 'current_op_common', 'fsync_locked', - 'killcursors_common', + 'kill_common', 'list_collections_filter', 'profile_common', 'test_commands_enabled', @@ -281,6 +282,7 @@ env.Library( '$BUILD_DIR/mongo/db/rw_concern_d', '$BUILD_DIR/mongo/db/s/sharding_catalog_manager', '$BUILD_DIR/mongo/s/sharding_legacy_api', + 'kill_common', 'mongod_fcv', 'mongod_fsync', 'standalone', @@ -290,9 +292,10 @@ env.Library( ) env.Library( - target='killcursors_common', + target='kill_common', source=[ 'killcursors_common.cpp', + 'kill_op_cmd_base.cpp' ], LIBDEPS=[ '$BUILD_DIR/mongo/db/audit', diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp index 90806185039..875638e46c2 100644 --- a/src/mongo/db/commands/find_cmd.cpp +++ b/src/mongo/db/commands/find_cmd.cpp @@ -35,6 +35,7 @@ #include "mongo/db/clientcursor.h" #include "mongo/db/commands.h" #include "mongo/db/commands/run_aggregate.h" +#include "mongo/db/curop_failpoint_helpers.h" #include "mongo/db/db_raii.h" #include "mongo/db/exec/working_set_common.h" #include "mongo/db/matcher/extensions_callback_real.h" @@ -329,6 +330,9 @@ public: return true; } + CurOpFailpointHelpers::waitWhileFailPointEnabled( + &waitInFindBeforeMakingBatch, opCtx, "waitInFindBeforeMakingBatch"); + const QueryRequest& originalQR = exec->getCanonicalQuery()->getQueryRequest(); // Stream query results, adding them to a BSONArray as we go. diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index 6674092b823..45595a597ef 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -40,6 +40,7 @@ #include "mongo/db/clientcursor.h" #include "mongo/db/commands.h" #include "mongo/db/curop.h" +#include "mongo/db/curop_failpoint_helpers.h" #include "mongo/db/cursor_manager.h" #include "mongo/db/db_raii.h" #include "mongo/db/exec/working_set_common.h" @@ -67,48 +68,6 @@ namespace { MONGO_FP_DECLARE(rsStopGetMoreCmd); -// Helper function which sets the 'msg' field of the opCtx's CurOp to the specified string, and -// returns the original value of the field. -std::string updateCurOpMsg(OperationContext* opCtx, const std::string& newMsg) { - stdx::lock_guard<Client> lk(*opCtx->getClient()); - auto oldMsg = CurOp::get(opCtx)->getMessage(); - CurOp::get(opCtx)->setMessage_inlock(newMsg.c_str()); - return oldMsg; -} - -// This helper function works much like MONGO_FAIL_POINT_PAUSE_WHILE_SET, but it additionally -// releases and re-acquires the collection readLock at regular intervals, in order to avoid -// deadlocks caused by the pinned-cursor failpoints in this file (see SERVER-21997). Finally, it -// also sets the 'msg' field of the opCtx's CurOp to the given string while the failpoint is active. -void waitWhileFailPointEnabled(FailPoint* failPoint, - OperationContext* opCtx, - const NamespaceString& nss, - const std::string& curOpMsg, - boost::optional<AutoGetCollectionForRead>* readLock) { - invariant(failPoint); - auto origCurOpMsg = updateCurOpMsg(opCtx, curOpMsg); - - MONGO_FAIL_POINT_BLOCK((*failPoint), options) { - const BSONObj& data = options.getData(); - const bool shouldCheckForInterrupt = data["shouldCheckForInterrupt"].booleanSafe(); - while (MONGO_FAIL_POINT((*failPoint))) { - sleepFor(Milliseconds(10)); - if (readLock && *readLock) { - readLock->reset(); - readLock->emplace(opCtx, nss); - } - - // Check for interrupt so that an operation can be killed while waiting for the - // failpoint to be disabled, if the failpoint is configured to be interruptible. - if (shouldCheckForInterrupt) { - opCtx->checkForInterrupt(); - } - } - } - - updateCurOpMsg(opCtx, origCurOpMsg); -} - /** * Validates that the lsid of 'opCtx' matches that of 'cursor'. This must be called after * authenticating, so that it is safe to report the lsid of 'cursor'. @@ -331,15 +290,23 @@ public: ClientCursor* cursor = ccPin.getValue().getCursor(); + // Only used by the failpoints. + const auto dropAndReaquireReadLock = [&readLock, opCtx, &request]() { + readLock.reset(); + readLock.emplace(opCtx, request.nss); + }; + // If the 'waitAfterPinningCursorBeforeGetMoreBatch' fail point is enabled, set the 'msg' - // field of this operation's CurOp to signal that we've hit this point and then spin until - // the failpoint is released. + // field of this operation's CurOp to signal that we've hit this point and then repeatedly + // release and re-acquire the collection readLock at regular intervals until the failpoint + // is released. This is done in order to avoid deadlocks caused by the pinned-cursor + // failpoints in this file (see SERVER-21997). if (MONGO_FAIL_POINT(waitAfterPinningCursorBeforeGetMoreBatch)) { - waitWhileFailPointEnabled(&waitAfterPinningCursorBeforeGetMoreBatch, - opCtx, - request.nss, - "waitAfterPinningCursorBeforeGetMoreBatch", - &readLock); + CurOpFailpointHelpers::waitWhileFailPointEnabled( + &waitAfterPinningCursorBeforeGetMoreBatch, + opCtx, + "waitAfterPinningCursorBeforeGetMoreBatch", + dropAndReaquireReadLock); } // A user can only call getMore on their own cursor. If there were multiple users @@ -468,11 +435,11 @@ public: // operation's CurOp to signal that we've hit this point and then spin until the failpoint // is released. if (MONGO_FAIL_POINT(waitWithPinnedCursorDuringGetMoreBatch)) { - waitWhileFailPointEnabled(&waitWithPinnedCursorDuringGetMoreBatch, - opCtx, - request.nss, - "waitWithPinnedCursorDuringGetMoreBatch", - &readLock); + CurOpFailpointHelpers::waitWhileFailPointEnabled( + &waitWithPinnedCursorDuringGetMoreBatch, + opCtx, + "waitWithPinnedCursorDuringGetMoreBatch", + dropAndReaquireReadLock); } Status batchStatus = generateBatch(opCtx, cursor, request, &nextBatch, &state, &numResults); @@ -525,11 +492,11 @@ public: // failpoint is active, set the 'msg' field of this operation's CurOp to signal that we've // hit this point and then spin until the failpoint is released. if (MONGO_FAIL_POINT(waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch)) { - waitWhileFailPointEnabled(&waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch, - opCtx, - request.nss, - "waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch", - &readLock); + CurOpFailpointHelpers::waitWhileFailPointEnabled( + &waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch, + opCtx, + "waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch", + dropAndReaquireReadLock); } return true; diff --git a/src/mongo/db/commands/kill_op.cpp b/src/mongo/db/commands/kill_op.cpp index e6278064654..604a16262da 100644 --- a/src/mongo/db/commands/kill_op.cpp +++ b/src/mongo/db/commands/kill_op.cpp @@ -39,6 +39,7 @@ #include "mongo/db/auth/authorization_session.h" #include "mongo/db/client.h" #include "mongo/db/commands.h" +#include "mongo/db/commands/kill_op_cmd_base.h" #include "mongo/db/operation_context.h" #include "mongo/db/service_context.h" #include "mongo/util/log.h" @@ -46,104 +47,20 @@ namespace mongo { -class KillOpCommand : public BasicCommand { +class KillOpCommand : public KillOpCmdBase { public: - KillOpCommand() : BasicCommand("killOp") {} - - - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { - return false; - } - - AllowedOnSecondary secondaryAllowed(ServiceContext*) const override { - return AllowedOnSecondary::kAlways; - } - - bool adminOnly() const final { - return true; - } - - static long long parseOpId(const BSONObj& cmdObj) { - long long op; - uassertStatusOK(bsonExtractIntegerField(cmdObj, "op", &op)); - - // Internally opid is an unsigned 32-bit int, but as BSON only has signed integer types, - // we wrap values exceeding 2,147,483,647 to negative numbers. The following undoes this - // transformation, so users can use killOp on the (negative) opid they received. - if (op >= std::numeric_limits<int>::min() && op < 0) - op += 1ull << 32; - - uassert(26823, - str::stream() << "invalid op : " << op, - (op >= 0) && (op <= std::numeric_limits<unsigned int>::max())); - - - return op; - } - - static StatusWith<std::tuple<stdx::unique_lock<Client>, OperationContext*>> _findOp( - Client* client, unsigned int opId) { - AuthorizationSession* authzSession = AuthorizationSession::get(client); - - for (ServiceContext::LockedClientsCursor cursor(client->getServiceContext()); - Client* opClient = cursor.next();) { - stdx::unique_lock<Client> lk(*opClient); - - OperationContext* opCtx = opClient->getOperationContext(); - if (opCtx && opCtx->getOpID() == opId) { - if (authzSession->isAuthorizedForActionsOnResource( - ResourcePattern::forClusterResource(), ActionType::killop) || - authzSession->isCoauthorizedWithClient(opClient)) { - return {std::make_tuple(std::move(lk), opCtx)}; - } - break; - } - } - - return Status(ErrorCodes::NoSuchKey, str::stream() << "Could not access opID: " << opId); - } - - Status checkAuthForCommand(Client* client, - const std::string& dbname, - const BSONObj& cmdObj) const final { - AuthorizationSession* authzSession = AuthorizationSession::get(client); - - if (authzSession->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(), - ActionType::killop)) { - // If we have administrative permission to run killop, we don't need to traverse the - // Client list to figure out if we own the operation which will be terminated. - return Status::OK(); - } - - bool isAuthenticated = - AuthorizationSession::get(client)->getAuthenticatedUserNames().more(); - if (isAuthenticated) { - long long opId = parseOpId(cmdObj); - auto swLkAndOp = _findOp(client, opId); - if (swLkAndOp.isOK()) { - // We were able to find the Operation, and we were authorized to interact with it. - return Status::OK(); - } - } - return Status(ErrorCodes::Unauthorized, "Unauthorized"); - } - bool run(OperationContext* opCtx, const std::string& db, const BSONObj& cmdObj, BSONObjBuilder& result) final { - long long opId = parseOpId(cmdObj); + long long opId = KillOpCmdBase::parseOpId(cmdObj); - log() << "going to kill op: " << opId; + // Used by tests to check if auth checks passed. result.append("info", "attempting to kill op"); - auto swLkAndOp = _findOp(opCtx->getClient(), opId); - if (swLkAndOp.isOK()) { - stdx::unique_lock<Client> lk; - OperationContext* opCtxToKill; - std::tie(lk, opCtxToKill) = std::move(swLkAndOp.getValue()); - opCtx->getServiceContext()->killOperation(opCtxToKill); - } + log() << "going to kill op: " << opId; + KillOpCmdBase::killLocalOperation(opCtx, opId); + // killOp always reports success once past the auth check. return true; } } killOpCmd; diff --git a/src/mongo/db/commands/kill_op_cmd_base.cpp b/src/mongo/db/commands/kill_op_cmd_base.cpp new file mode 100644 index 00000000000..50035b03eea --- /dev/null +++ b/src/mongo/db/commands/kill_op_cmd_base.cpp @@ -0,0 +1,132 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/commands/kill_op_cmd_base.h" + +#include "mongo/bson/util/bson_extract.h" +#include "mongo/db/audit.h" +#include "mongo/db/auth/authorization_session.h" +#include "mongo/db/client.h" +#include "mongo/db/operation_context.h" +#include "mongo/util/mongoutils/str.h" + +namespace mongo { + +Status KillOpCmdBase::checkAuthForCommand(Client* client, + const std::string& dbname, + const BSONObj& cmdObj) const { + AuthorizationSession* authzSession = AuthorizationSession::get(client); + + if (authzSession->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(), + ActionType::killop)) { + // If we have administrative permission to run killop, we don't need to traverse the + // Client list to figure out if we own the operation which will be terminated. + return Status::OK(); + } + + bool isAuthenticated = AuthorizationSession::get(client)->getAuthenticatedUserNames().more(); + if (isAuthenticated && isKillingLocalOp(cmdObj.getField("op"))) { + // Look up the OperationContext and see if we have permission to kill it. This is done once + // here and again in the command body. The check here in the checkAuthForCommand() function + // is necessary because if the check fails, it will be picked up by the auditing system. + long long opId = parseOpId(cmdObj); + auto lkAndOp = KillOpCmdBase::findOpForKilling(client, opId); + if (lkAndOp) { + // We were able to find the Operation, and we were authorized to interact with it. + return Status::OK(); + } + } + return Status(ErrorCodes::Unauthorized, "Unauthorized"); +} + + +bool KillOpCmdBase::isKillingLocalOp(const BSONElement& opElem) { + return opElem.isNumber(); +} + +boost::optional<std::tuple<stdx::unique_lock<Client>, OperationContext*>> +KillOpCmdBase::findOperationContext(ServiceContext* serviceContext, unsigned int opId) { + for (ServiceContext::LockedClientsCursor cursor(serviceContext); + Client* opClient = cursor.next();) { + stdx::unique_lock<Client> lk(*opClient); + + OperationContext* opCtx = opClient->getOperationContext(); + if (opCtx && opCtx->getOpID() == opId) { + return {std::make_tuple(std::move(lk), opCtx)}; + } + } + + return boost::none; +} + +boost::optional<std::tuple<stdx::unique_lock<Client>, OperationContext*>> +KillOpCmdBase::findOpForKilling(Client* client, unsigned int opId) { + AuthorizationSession* authzSession = AuthorizationSession::get(client); + + auto lockAndOpCtx = findOperationContext(client->getServiceContext(), opId); + if (lockAndOpCtx) { + OperationContext* opToKill = std::get<1>(*lockAndOpCtx); + if (authzSession->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(), + ActionType::killop) || + authzSession->isCoauthorizedWithClient(opToKill->getClient())) { + return lockAndOpCtx; + } + } + + return boost::none; +} + +void KillOpCmdBase::killLocalOperation(OperationContext* opCtx, unsigned int opToKill) { + stdx::unique_lock<Client> lk; + OperationContext* opCtxToKill; + auto lockAndOpCtx = findOpForKilling(opCtx->getClient(), opToKill); + if (!lockAndOpCtx) { + // killOp always reports success past the auth check. + return; + } + + std::tie(lk, opCtxToKill) = std::move(*lockAndOpCtx); + + invariant(lk); + opCtx->getServiceContext()->killOperation(opCtxToKill); +} + +unsigned int KillOpCmdBase::parseOpId(const BSONObj& cmdObj) { + long long op; + uassertStatusOK(bsonExtractIntegerField(cmdObj, "op", &op)); + + uassert(26823, + str::stream() << "invalid op : " << op << ". Op ID cannot be represented with 32 bits", + (op >= std::numeric_limits<int>::min()) && (op <= std::numeric_limits<int>::max())); + + return static_cast<unsigned int>(op); +} + +} // namespace mongo diff --git a/src/mongo/db/commands/kill_op_cmd_base.h b/src/mongo/db/commands/kill_op_cmd_base.h new file mode 100644 index 00000000000..ad4d2f8abfd --- /dev/null +++ b/src/mongo/db/commands/kill_op_cmd_base.h @@ -0,0 +1,100 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/db/commands.h" +#include "mongo/db/operation_context.h" + +namespace mongo { + +/** + * Base class for the killOp command, which attempts to kill a given operation. Contains code + * common to mongos and mongod implementations. + */ +class KillOpCmdBase : public BasicCommand { +public: + KillOpCmdBase() : BasicCommand("killOp") {} + + virtual ~KillOpCmdBase() = default; + + bool supportsWriteConcern(const BSONObj& cmd) const override { + return false; + } + + AllowedOnSecondary secondaryAllowed(ServiceContext*) const override { + return AllowedOnSecondary::kAlways; + } + + bool adminOnly() const override { + return true; + } + + Status checkAuthForCommand(Client* client, + const std::string& dbname, + const BSONObj& cmdObj) const final; + +protected: + /** + * Given an operation ID, search for an OperationContext with that ID. Returns either + * boost::none if no operation with the given ID exists, or the OperationContext along with the + * (acquired) lock for the associated Client. + */ + static boost::optional<std::tuple<stdx::unique_lock<Client>, OperationContext*>> + findOperationContext(ServiceContext* serviceContext, unsigned int opId); + + /** + * Find the given operation, and check if we're authorized to kill it. If the operation is + * found, and we're allowed to kill it, this returns the OperationContext as well as the + * acquired lock for the associated Client. Otherwise boost::none is returned. + */ + static boost::optional<std::tuple<stdx::unique_lock<Client>, OperationContext*>> + findOpForKilling(Client* client, unsigned int opId); + + /** + * Kill an operation running on this instance of mongod or mongos. + */ + static void killLocalOperation(OperationContext* opCtx, unsigned int opToKill); + + /** + * Extract the "op" field from 'cmdObj' and convert the value to unsigned int. Since BSON only + * supports signed number types, and an opId is unsigned, the "op" field of 'cmdObj' may be + * negative, so that it can be stored in a signed type. The conversion back to unsigned is + * taken care of here. + */ + static unsigned int parseOpId(const BSONObj& cmdObj); + + /** + * Return whether the operation being killed is "local" or not. All operations on a mongod are + * local. On a mongos, killOp may may kill an operation on a shard, or an operation "local" to + * the mongos. + * + * Expects to be passed the "op" field of the command object. + */ + static bool isKillingLocalOp(const BSONElement& opElem); +}; + +} // namespace mongo |