diff options
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/SConscript | 11 | ||||
-rw-r--r-- | src/mongo/db/commands/SConscript | 7 | ||||
-rw-r--r-- | src/mongo/db/commands/find_cmd.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/commands/getmore_cmd.cpp | 85 | ||||
-rw-r--r-- | src/mongo/db/commands/kill_op.cpp | 97 | ||||
-rw-r--r-- | src/mongo/db/commands/kill_op_cmd_base.cpp | 132 | ||||
-rw-r--r-- | src/mongo/db/commands/kill_op_cmd_base.h | 100 | ||||
-rw-r--r-- | src/mongo/db/curop_failpoint_helpers.cpp | 78 | ||||
-rw-r--r-- | src/mongo/db/curop_failpoint_helpers.h | 51 | ||||
-rw-r--r-- | src/mongo/db/query/find_common.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/find_common.h | 3 | ||||
-rw-r--r-- | src/mongo/s/commands/SConscript | 2 | ||||
-rw-r--r-- | src/mongo/s/commands/cluster_kill_op.cpp | 49 | ||||
-rw-r--r-- | src/mongo/s/query/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/s/query/cluster_find.cpp | 5 |
15 files changed, 450 insertions, 179 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 1cc6e1ba889..de154654219 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -210,6 +210,17 @@ env.Library( ], ) +env.Library( + target='curop_failpoint_helpers', + source=[ + 'curop_failpoint_helpers.cpp', + ], + LIBDEPS=[ + 'curop', + '$BUILD_DIR/mongo/util/fail_point', + ], +) + env.CppUnitTest( target='operation_context_test', source=[ diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript index b07fd756abf..fb60cfb4c04 100644 --- a/src/mongo/db/commands/SConscript +++ b/src/mongo/db/commands/SConscript @@ -215,6 +215,7 @@ env.Library( '$BUILD_DIR/mongo/db/catalog/index_key_validate', '$BUILD_DIR/mongo/db/cloner', '$BUILD_DIR/mongo/db/command_can_run_here', + '$BUILD_DIR/mongo/db/curop_failpoint_helpers', '$BUILD_DIR/mongo/db/exec/stagedebug_cmd', '$BUILD_DIR/mongo/db/index/index_access_method', '$BUILD_DIR/mongo/db/index_d', @@ -232,7 +233,7 @@ env.Library( 'core', 'current_op_common', 'fsync_locked', - 'killcursors_common', + 'kill_common', 'list_collections_filter', 'profile_common', 'test_commands_enabled', @@ -281,6 +282,7 @@ env.Library( '$BUILD_DIR/mongo/db/rw_concern_d', '$BUILD_DIR/mongo/db/s/sharding_catalog_manager', '$BUILD_DIR/mongo/s/sharding_legacy_api', + 'kill_common', 'mongod_fcv', 'mongod_fsync', 'standalone', @@ -290,9 +292,10 @@ env.Library( ) env.Library( - target='killcursors_common', + target='kill_common', source=[ 'killcursors_common.cpp', + 'kill_op_cmd_base.cpp' ], LIBDEPS=[ '$BUILD_DIR/mongo/db/audit', diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp index 90806185039..875638e46c2 100644 --- a/src/mongo/db/commands/find_cmd.cpp +++ b/src/mongo/db/commands/find_cmd.cpp @@ -35,6 +35,7 @@ #include "mongo/db/clientcursor.h" #include "mongo/db/commands.h" #include "mongo/db/commands/run_aggregate.h" +#include "mongo/db/curop_failpoint_helpers.h" #include "mongo/db/db_raii.h" #include "mongo/db/exec/working_set_common.h" #include "mongo/db/matcher/extensions_callback_real.h" @@ -329,6 +330,9 @@ public: return true; } + CurOpFailpointHelpers::waitWhileFailPointEnabled( + &waitInFindBeforeMakingBatch, opCtx, "waitInFindBeforeMakingBatch"); + const QueryRequest& originalQR = exec->getCanonicalQuery()->getQueryRequest(); // Stream query results, adding them to a BSONArray as we go. diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index 6674092b823..45595a597ef 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -40,6 +40,7 @@ #include "mongo/db/clientcursor.h" #include "mongo/db/commands.h" #include "mongo/db/curop.h" +#include "mongo/db/curop_failpoint_helpers.h" #include "mongo/db/cursor_manager.h" #include "mongo/db/db_raii.h" #include "mongo/db/exec/working_set_common.h" @@ -67,48 +68,6 @@ namespace { MONGO_FP_DECLARE(rsStopGetMoreCmd); -// Helper function which sets the 'msg' field of the opCtx's CurOp to the specified string, and -// returns the original value of the field. -std::string updateCurOpMsg(OperationContext* opCtx, const std::string& newMsg) { - stdx::lock_guard<Client> lk(*opCtx->getClient()); - auto oldMsg = CurOp::get(opCtx)->getMessage(); - CurOp::get(opCtx)->setMessage_inlock(newMsg.c_str()); - return oldMsg; -} - -// This helper function works much like MONGO_FAIL_POINT_PAUSE_WHILE_SET, but it additionally -// releases and re-acquires the collection readLock at regular intervals, in order to avoid -// deadlocks caused by the pinned-cursor failpoints in this file (see SERVER-21997). Finally, it -// also sets the 'msg' field of the opCtx's CurOp to the given string while the failpoint is active. -void waitWhileFailPointEnabled(FailPoint* failPoint, - OperationContext* opCtx, - const NamespaceString& nss, - const std::string& curOpMsg, - boost::optional<AutoGetCollectionForRead>* readLock) { - invariant(failPoint); - auto origCurOpMsg = updateCurOpMsg(opCtx, curOpMsg); - - MONGO_FAIL_POINT_BLOCK((*failPoint), options) { - const BSONObj& data = options.getData(); - const bool shouldCheckForInterrupt = data["shouldCheckForInterrupt"].booleanSafe(); - while (MONGO_FAIL_POINT((*failPoint))) { - sleepFor(Milliseconds(10)); - if (readLock && *readLock) { - readLock->reset(); - readLock->emplace(opCtx, nss); - } - - // Check for interrupt so that an operation can be killed while waiting for the - // failpoint to be disabled, if the failpoint is configured to be interruptible. - if (shouldCheckForInterrupt) { - opCtx->checkForInterrupt(); - } - } - } - - updateCurOpMsg(opCtx, origCurOpMsg); -} - /** * Validates that the lsid of 'opCtx' matches that of 'cursor'. This must be called after * authenticating, so that it is safe to report the lsid of 'cursor'. @@ -331,15 +290,23 @@ public: ClientCursor* cursor = ccPin.getValue().getCursor(); + // Only used by the failpoints. + const auto dropAndReaquireReadLock = [&readLock, opCtx, &request]() { + readLock.reset(); + readLock.emplace(opCtx, request.nss); + }; + // If the 'waitAfterPinningCursorBeforeGetMoreBatch' fail point is enabled, set the 'msg' - // field of this operation's CurOp to signal that we've hit this point and then spin until - // the failpoint is released. + // field of this operation's CurOp to signal that we've hit this point and then repeatedly + // release and re-acquire the collection readLock at regular intervals until the failpoint + // is released. This is done in order to avoid deadlocks caused by the pinned-cursor + // failpoints in this file (see SERVER-21997). if (MONGO_FAIL_POINT(waitAfterPinningCursorBeforeGetMoreBatch)) { - waitWhileFailPointEnabled(&waitAfterPinningCursorBeforeGetMoreBatch, - opCtx, - request.nss, - "waitAfterPinningCursorBeforeGetMoreBatch", - &readLock); + CurOpFailpointHelpers::waitWhileFailPointEnabled( + &waitAfterPinningCursorBeforeGetMoreBatch, + opCtx, + "waitAfterPinningCursorBeforeGetMoreBatch", + dropAndReaquireReadLock); } // A user can only call getMore on their own cursor. If there were multiple users @@ -468,11 +435,11 @@ public: // operation's CurOp to signal that we've hit this point and then spin until the failpoint // is released. if (MONGO_FAIL_POINT(waitWithPinnedCursorDuringGetMoreBatch)) { - waitWhileFailPointEnabled(&waitWithPinnedCursorDuringGetMoreBatch, - opCtx, - request.nss, - "waitWithPinnedCursorDuringGetMoreBatch", - &readLock); + CurOpFailpointHelpers::waitWhileFailPointEnabled( + &waitWithPinnedCursorDuringGetMoreBatch, + opCtx, + "waitWithPinnedCursorDuringGetMoreBatch", + dropAndReaquireReadLock); } Status batchStatus = generateBatch(opCtx, cursor, request, &nextBatch, &state, &numResults); @@ -525,11 +492,11 @@ public: // failpoint is active, set the 'msg' field of this operation's CurOp to signal that we've // hit this point and then spin until the failpoint is released. if (MONGO_FAIL_POINT(waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch)) { - waitWhileFailPointEnabled(&waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch, - opCtx, - request.nss, - "waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch", - &readLock); + CurOpFailpointHelpers::waitWhileFailPointEnabled( + &waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch, + opCtx, + "waitBeforeUnpinningOrDeletingCursorAfterGetMoreBatch", + dropAndReaquireReadLock); } return true; diff --git a/src/mongo/db/commands/kill_op.cpp b/src/mongo/db/commands/kill_op.cpp index e6278064654..604a16262da 100644 --- a/src/mongo/db/commands/kill_op.cpp +++ b/src/mongo/db/commands/kill_op.cpp @@ -39,6 +39,7 @@ #include "mongo/db/auth/authorization_session.h" #include "mongo/db/client.h" #include "mongo/db/commands.h" +#include "mongo/db/commands/kill_op_cmd_base.h" #include "mongo/db/operation_context.h" #include "mongo/db/service_context.h" #include "mongo/util/log.h" @@ -46,104 +47,20 @@ namespace mongo { -class KillOpCommand : public BasicCommand { +class KillOpCommand : public KillOpCmdBase { public: - KillOpCommand() : BasicCommand("killOp") {} - - - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { - return false; - } - - AllowedOnSecondary secondaryAllowed(ServiceContext*) const override { - return AllowedOnSecondary::kAlways; - } - - bool adminOnly() const final { - return true; - } - - static long long parseOpId(const BSONObj& cmdObj) { - long long op; - uassertStatusOK(bsonExtractIntegerField(cmdObj, "op", &op)); - - // Internally opid is an unsigned 32-bit int, but as BSON only has signed integer types, - // we wrap values exceeding 2,147,483,647 to negative numbers. The following undoes this - // transformation, so users can use killOp on the (negative) opid they received. - if (op >= std::numeric_limits<int>::min() && op < 0) - op += 1ull << 32; - - uassert(26823, - str::stream() << "invalid op : " << op, - (op >= 0) && (op <= std::numeric_limits<unsigned int>::max())); - - - return op; - } - - static StatusWith<std::tuple<stdx::unique_lock<Client>, OperationContext*>> _findOp( - Client* client, unsigned int opId) { - AuthorizationSession* authzSession = AuthorizationSession::get(client); - - for (ServiceContext::LockedClientsCursor cursor(client->getServiceContext()); - Client* opClient = cursor.next();) { - stdx::unique_lock<Client> lk(*opClient); - - OperationContext* opCtx = opClient->getOperationContext(); - if (opCtx && opCtx->getOpID() == opId) { - if (authzSession->isAuthorizedForActionsOnResource( - ResourcePattern::forClusterResource(), ActionType::killop) || - authzSession->isCoauthorizedWithClient(opClient)) { - return {std::make_tuple(std::move(lk), opCtx)}; - } - break; - } - } - - return Status(ErrorCodes::NoSuchKey, str::stream() << "Could not access opID: " << opId); - } - - Status checkAuthForCommand(Client* client, - const std::string& dbname, - const BSONObj& cmdObj) const final { - AuthorizationSession* authzSession = AuthorizationSession::get(client); - - if (authzSession->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(), - ActionType::killop)) { - // If we have administrative permission to run killop, we don't need to traverse the - // Client list to figure out if we own the operation which will be terminated. - return Status::OK(); - } - - bool isAuthenticated = - AuthorizationSession::get(client)->getAuthenticatedUserNames().more(); - if (isAuthenticated) { - long long opId = parseOpId(cmdObj); - auto swLkAndOp = _findOp(client, opId); - if (swLkAndOp.isOK()) { - // We were able to find the Operation, and we were authorized to interact with it. - return Status::OK(); - } - } - return Status(ErrorCodes::Unauthorized, "Unauthorized"); - } - bool run(OperationContext* opCtx, const std::string& db, const BSONObj& cmdObj, BSONObjBuilder& result) final { - long long opId = parseOpId(cmdObj); + long long opId = KillOpCmdBase::parseOpId(cmdObj); - log() << "going to kill op: " << opId; + // Used by tests to check if auth checks passed. result.append("info", "attempting to kill op"); - auto swLkAndOp = _findOp(opCtx->getClient(), opId); - if (swLkAndOp.isOK()) { - stdx::unique_lock<Client> lk; - OperationContext* opCtxToKill; - std::tie(lk, opCtxToKill) = std::move(swLkAndOp.getValue()); - opCtx->getServiceContext()->killOperation(opCtxToKill); - } + log() << "going to kill op: " << opId; + KillOpCmdBase::killLocalOperation(opCtx, opId); + // killOp always reports success once past the auth check. return true; } } killOpCmd; diff --git a/src/mongo/db/commands/kill_op_cmd_base.cpp b/src/mongo/db/commands/kill_op_cmd_base.cpp new file mode 100644 index 00000000000..50035b03eea --- /dev/null +++ b/src/mongo/db/commands/kill_op_cmd_base.cpp @@ -0,0 +1,132 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/commands/kill_op_cmd_base.h" + +#include "mongo/bson/util/bson_extract.h" +#include "mongo/db/audit.h" +#include "mongo/db/auth/authorization_session.h" +#include "mongo/db/client.h" +#include "mongo/db/operation_context.h" +#include "mongo/util/mongoutils/str.h" + +namespace mongo { + +Status KillOpCmdBase::checkAuthForCommand(Client* client, + const std::string& dbname, + const BSONObj& cmdObj) const { + AuthorizationSession* authzSession = AuthorizationSession::get(client); + + if (authzSession->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(), + ActionType::killop)) { + // If we have administrative permission to run killop, we don't need to traverse the + // Client list to figure out if we own the operation which will be terminated. + return Status::OK(); + } + + bool isAuthenticated = AuthorizationSession::get(client)->getAuthenticatedUserNames().more(); + if (isAuthenticated && isKillingLocalOp(cmdObj.getField("op"))) { + // Look up the OperationContext and see if we have permission to kill it. This is done once + // here and again in the command body. The check here in the checkAuthForCommand() function + // is necessary because if the check fails, it will be picked up by the auditing system. + long long opId = parseOpId(cmdObj); + auto lkAndOp = KillOpCmdBase::findOpForKilling(client, opId); + if (lkAndOp) { + // We were able to find the Operation, and we were authorized to interact with it. + return Status::OK(); + } + } + return Status(ErrorCodes::Unauthorized, "Unauthorized"); +} + + +bool KillOpCmdBase::isKillingLocalOp(const BSONElement& opElem) { + return opElem.isNumber(); +} + +boost::optional<std::tuple<stdx::unique_lock<Client>, OperationContext*>> +KillOpCmdBase::findOperationContext(ServiceContext* serviceContext, unsigned int opId) { + for (ServiceContext::LockedClientsCursor cursor(serviceContext); + Client* opClient = cursor.next();) { + stdx::unique_lock<Client> lk(*opClient); + + OperationContext* opCtx = opClient->getOperationContext(); + if (opCtx && opCtx->getOpID() == opId) { + return {std::make_tuple(std::move(lk), opCtx)}; + } + } + + return boost::none; +} + +boost::optional<std::tuple<stdx::unique_lock<Client>, OperationContext*>> +KillOpCmdBase::findOpForKilling(Client* client, unsigned int opId) { + AuthorizationSession* authzSession = AuthorizationSession::get(client); + + auto lockAndOpCtx = findOperationContext(client->getServiceContext(), opId); + if (lockAndOpCtx) { + OperationContext* opToKill = std::get<1>(*lockAndOpCtx); + if (authzSession->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(), + ActionType::killop) || + authzSession->isCoauthorizedWithClient(opToKill->getClient())) { + return lockAndOpCtx; + } + } + + return boost::none; +} + +void KillOpCmdBase::killLocalOperation(OperationContext* opCtx, unsigned int opToKill) { + stdx::unique_lock<Client> lk; + OperationContext* opCtxToKill; + auto lockAndOpCtx = findOpForKilling(opCtx->getClient(), opToKill); + if (!lockAndOpCtx) { + // killOp always reports success past the auth check. + return; + } + + std::tie(lk, opCtxToKill) = std::move(*lockAndOpCtx); + + invariant(lk); + opCtx->getServiceContext()->killOperation(opCtxToKill); +} + +unsigned int KillOpCmdBase::parseOpId(const BSONObj& cmdObj) { + long long op; + uassertStatusOK(bsonExtractIntegerField(cmdObj, "op", &op)); + + uassert(26823, + str::stream() << "invalid op : " << op << ". Op ID cannot be represented with 32 bits", + (op >= std::numeric_limits<int>::min()) && (op <= std::numeric_limits<int>::max())); + + return static_cast<unsigned int>(op); +} + +} // namespace mongo diff --git a/src/mongo/db/commands/kill_op_cmd_base.h b/src/mongo/db/commands/kill_op_cmd_base.h new file mode 100644 index 00000000000..ad4d2f8abfd --- /dev/null +++ b/src/mongo/db/commands/kill_op_cmd_base.h @@ -0,0 +1,100 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/db/commands.h" +#include "mongo/db/operation_context.h" + +namespace mongo { + +/** + * Base class for the killOp command, which attempts to kill a given operation. Contains code + * common to mongos and mongod implementations. + */ +class KillOpCmdBase : public BasicCommand { +public: + KillOpCmdBase() : BasicCommand("killOp") {} + + virtual ~KillOpCmdBase() = default; + + bool supportsWriteConcern(const BSONObj& cmd) const override { + return false; + } + + AllowedOnSecondary secondaryAllowed(ServiceContext*) const override { + return AllowedOnSecondary::kAlways; + } + + bool adminOnly() const override { + return true; + } + + Status checkAuthForCommand(Client* client, + const std::string& dbname, + const BSONObj& cmdObj) const final; + +protected: + /** + * Given an operation ID, search for an OperationContext with that ID. Returns either + * boost::none if no operation with the given ID exists, or the OperationContext along with the + * (acquired) lock for the associated Client. + */ + static boost::optional<std::tuple<stdx::unique_lock<Client>, OperationContext*>> + findOperationContext(ServiceContext* serviceContext, unsigned int opId); + + /** + * Find the given operation, and check if we're authorized to kill it. If the operation is + * found, and we're allowed to kill it, this returns the OperationContext as well as the + * acquired lock for the associated Client. Otherwise boost::none is returned. + */ + static boost::optional<std::tuple<stdx::unique_lock<Client>, OperationContext*>> + findOpForKilling(Client* client, unsigned int opId); + + /** + * Kill an operation running on this instance of mongod or mongos. + */ + static void killLocalOperation(OperationContext* opCtx, unsigned int opToKill); + + /** + * Extract the "op" field from 'cmdObj' and convert the value to unsigned int. Since BSON only + * supports signed number types, and an opId is unsigned, the "op" field of 'cmdObj' may be + * negative, so that it can be stored in a signed type. The conversion back to unsigned is + * taken care of here. + */ + static unsigned int parseOpId(const BSONObj& cmdObj); + + /** + * Return whether the operation being killed is "local" or not. All operations on a mongod are + * local. On a mongos, killOp may may kill an operation on a shard, or an operation "local" to + * the mongos. + * + * Expects to be passed the "op" field of the command object. + */ + static bool isKillingLocalOp(const BSONElement& opElem); +}; + +} // namespace mongo diff --git a/src/mongo/db/curop_failpoint_helpers.cpp b/src/mongo/db/curop_failpoint_helpers.cpp new file mode 100644 index 00000000000..4a6b5fbe7d1 --- /dev/null +++ b/src/mongo/db/curop_failpoint_helpers.cpp @@ -0,0 +1,78 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/curop_failpoint_helpers.h" + +#include "mongo/db/curop.h" + + +namespace mongo { + +namespace { + +// Helper function which sets the 'msg' field of the opCtx's CurOp to the specified string, and +// returns the original value of the field. +std::string updateCurOpMsg(OperationContext* opCtx, const std::string& newMsg) { + stdx::lock_guard<Client> lk(*opCtx->getClient()); + auto oldMsg = CurOp::get(opCtx)->getMessage(); + CurOp::get(opCtx)->setMessage_inlock(newMsg.c_str()); + return oldMsg; +} + +} // namespace + +void CurOpFailpointHelpers::waitWhileFailPointEnabled( + FailPoint* failPoint, + OperationContext* opCtx, + const std::string& curOpMsg, + const std::function<void(void)>& whileWaiting) { + invariant(failPoint); + auto origCurOpMsg = updateCurOpMsg(opCtx, curOpMsg); + + MONGO_FAIL_POINT_BLOCK((*failPoint), options) { + const BSONObj& data = options.getData(); + const bool shouldCheckForInterrupt = data["shouldCheckForInterrupt"].booleanSafe(); + while (MONGO_FAIL_POINT((*failPoint))) { + sleepFor(Milliseconds(10)); + if (whileWaiting) { + whileWaiting(); + } + + // Check for interrupt so that an operation can be killed while waiting for the + // failpoint to be disabled, if the failpoint is configured to be interruptible. + if (shouldCheckForInterrupt) { + opCtx->checkForInterrupt(); + } + } + } + + updateCurOpMsg(opCtx, origCurOpMsg); +} +} diff --git a/src/mongo/db/curop_failpoint_helpers.h b/src/mongo/db/curop_failpoint_helpers.h new file mode 100644 index 00000000000..47e05a93eed --- /dev/null +++ b/src/mongo/db/curop_failpoint_helpers.h @@ -0,0 +1,51 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/bson/bsonobj.h" +#include "mongo/db/operation_context.h" +#include "mongo/util/fail_point_service.h" + +namespace mongo { + +class CurOpFailpointHelpers { +public: + /** + * This helper function works much like MONGO_FAIL_POINT_PAUSE_WHILE_SET, but additionally + * calls whileWaiting() at regular intervals. Finally, it also sets the 'msg' field of the + * opCtx's CurOp to the given string while the failpoint is active. + * + * whileWaiting() may be used to do anything the caller needs done while hanging in the + * failpoint. For example, the caller may use whileWaiting() to release and reacquire locks in + * order to avoid deadlocks. + */ + static void waitWhileFailPointEnabled(FailPoint* failPoint, + OperationContext* opCtx, + const std::string& curOpMsg, + const std::function<void(void)>& whileWaiting = nullptr); +}; +} diff --git a/src/mongo/db/query/find_common.cpp b/src/mongo/db/query/find_common.cpp index affea8b8967..314deea5ae7 100644 --- a/src/mongo/db/query/find_common.cpp +++ b/src/mongo/db/query/find_common.cpp @@ -31,11 +31,14 @@ #include "mongo/db/query/find_common.h" #include "mongo/bson/bsonobj.h" +#include "mongo/db/curop.h" #include "mongo/db/query/query_request.h" #include "mongo/util/assert_util.h" namespace mongo { +MONGO_FP_DECLARE(waitInFindBeforeMakingBatch); + MONGO_FP_DECLARE(disableAwaitDataForGetMoreCmd); MONGO_FP_DECLARE(waitAfterPinningCursorBeforeGetMoreBatch); @@ -85,5 +88,4 @@ BSONObj FindCommon::transformSortSpec(const BSONObj& sortSpec) { return comparatorBob.obj(); } - } // namespace mongo diff --git a/src/mongo/db/query/find_common.h b/src/mongo/db/query/find_common.h index 6aaa8e390ff..cc84a9801b7 100644 --- a/src/mongo/db/query/find_common.h +++ b/src/mongo/db/query/find_common.h @@ -54,6 +54,9 @@ extern const OperationContext::Decoration<AwaitDataState> awaitDataState; class BSONObj; class QueryRequest; +// Failpoint for making find hang. +MONGO_FP_FORWARD_DECLARE(waitInFindBeforeMakingBatch); + // Failpoint for making getMore not wait for an awaitdata cursor. Allows us to avoid waiting during // tests. MONGO_FP_FORWARD_DECLARE(disableAwaitDataForGetMoreCmd); diff --git a/src/mongo/s/commands/SConscript b/src/mongo/s/commands/SConscript index 463f91edb7c..dc260353030 100644 --- a/src/mongo/s/commands/SConscript +++ b/src/mongo/s/commands/SConscript @@ -97,7 +97,7 @@ env.Library( '$BUILD_DIR/mongo/db/commands/current_op_common', '$BUILD_DIR/mongo/db/commands/servers', '$BUILD_DIR/mongo/db/commands/feature_compatibility_version_parser', - '$BUILD_DIR/mongo/db/commands/killcursors_common', + '$BUILD_DIR/mongo/db/commands/kill_common', '$BUILD_DIR/mongo/db/commands/profile_common', '$BUILD_DIR/mongo/db/commands/test_commands_enabled', '$BUILD_DIR/mongo/db/commands/write_commands_common', diff --git a/src/mongo/s/commands/cluster_kill_op.cpp b/src/mongo/s/commands/cluster_kill_op.cpp index 7f942eb870a..795f2005460 100644 --- a/src/mongo/s/commands/cluster_kill_op.cpp +++ b/src/mongo/s/commands/cluster_kill_op.cpp @@ -41,6 +41,7 @@ #include "mongo/db/audit.h" #include "mongo/db/auth/authorization_session.h" #include "mongo/db/commands.h" +#include "mongo/db/commands/kill_op_cmd_base.h" #include "mongo/rpc/metadata.h" #include "mongo/s/client/shard.h" #include "mongo/s/client/shard_registry.h" @@ -51,40 +52,37 @@ namespace mongo { namespace { -class ClusterKillOpCommand : public BasicCommand { +class ClusterKillOpCommand : public KillOpCmdBase { public: - ClusterKillOpCommand() : BasicCommand("killOp") {} - + bool run(OperationContext* opCtx, + const std::string& db, + const BSONObj& cmdObj, + BSONObjBuilder& result) final { - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { - return false; - } + BSONElement element = cmdObj.getField("op"); + uassert(50759, "Did not provide \"op\" field", element.ok()); - AllowedOnSecondary secondaryAllowed(ServiceContext*) const final { - return AllowedOnSecondary::kAlways; - } + if (isKillingLocalOp(element)) { + const unsigned int opId = KillOpCmdBase::parseOpId(cmdObj); + killLocalOperation(opCtx, opId); - bool adminOnly() const final { - return true; - } + // killOp always reports success once past the auth check. + return true; + } else if (element.type() == BSONType::String) { + // It's a string. Should be of the form shardid:opid. + return killShardOperation(opCtx, element.str(), result); + } - Status checkAuthForCommand(Client* client, - const std::string& dbname, - const BSONObj& cmdObj) const final { - bool isAuthorized = AuthorizationSession::get(client)->isAuthorizedForActionsOnResource( - ResourcePattern::forClusterResource(), ActionType::killop); - return isAuthorized ? Status::OK() : Status(ErrorCodes::Unauthorized, "Unauthorized"); + uasserted(50760, + str::stream() << "\"op\" field was of unsupported type " << element.type()); } - bool run(OperationContext* opCtx, - const std::string& db, - const BSONObj& cmdObj, - BSONObjBuilder& result) final { +private: + bool killShardOperation(OperationContext* opCtx, + const std::string& opToKill, + BSONObjBuilder& result) { // The format of op is shardid:opid // This is different than the format passed to the mongod killOp command. - std::string opToKill; - uassertStatusOK(bsonExtractStringField(cmdObj, "op", &opToKill)); - const auto opSepPos = opToKill.find(':'); uassert(28625, @@ -123,7 +121,6 @@ public: // whether the shard reported success or not. return true; } - } clusterKillOpCommand; } // namespace diff --git a/src/mongo/s/query/SConscript b/src/mongo/s/query/SConscript index 86f33dfc054..d9148577f5a 100644 --- a/src/mongo/s/query/SConscript +++ b/src/mongo/s/query/SConscript @@ -13,6 +13,7 @@ env.Library( LIBDEPS=[ '$BUILD_DIR/mongo/db/commands', '$BUILD_DIR/mongo/db/curop', + '$BUILD_DIR/mongo/db/curop_failpoint_helpers', '$BUILD_DIR/mongo/db/query/query_common', '$BUILD_DIR/mongo/s/commands/shared_cluster_commands', "cluster_client_cursor", diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp index c00ee5049a6..7b9d1b01b7e 100644 --- a/src/mongo/s/query/cluster_find.cpp +++ b/src/mongo/s/query/cluster_find.cpp @@ -42,6 +42,7 @@ #include "mongo/db/auth/authorization_session.h" #include "mongo/db/commands.h" #include "mongo/db/curop.h" +#include "mongo/db/curop_failpoint_helpers.h" #include "mongo/db/logical_clock.h" #include "mongo/db/query/canonical_query.h" #include "mongo/db/query/find_common.h" @@ -56,6 +57,7 @@ #include "mongo/s/query/async_results_merger.h" #include "mongo/s/query/cluster_client_cursor_impl.h" #include "mongo/s/query/cluster_cursor_manager.h" +#include "mongo/s/query/cluster_query_knobs.h" #include "mongo/s/query/establish_cursors.h" #include "mongo/s/query/store_possible_cursor.h" #include "mongo/s/stale_exception.h" @@ -300,6 +302,9 @@ CursorId runQueryWithoutRetrying(OperationContext* opCtx, // Retrieve enough data from the ClusterClientCursor for the first batch of results. + CurOpFailpointHelpers::waitWhileFailPointEnabled( + &waitInFindBeforeMakingBatch, opCtx, "waitInFindBeforeMakingBatch"); + auto cursorState = ClusterCursorManager::CursorState::NotExhausted; int bytesBuffered = 0; |