summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMisha Tyulenev <misha@mongodb.com>2020-04-16 14:04:31 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-23 16:36:41 +0000
commit7ee6f5f0157eea08876c705dcf6e02309d6153da (patch)
treedd69a74576b02be018e9a0afd3e1083a6ae9d1b9
parentc3b47f6e122ae36ff297944a9d90ab9f1151ec17 (diff)
downloadmongo-7ee6f5f0157eea08876c705dcf6e02309d6153da.tar.gz
SERVER-47117 clear maxTimeMSForHedgedReads delay from the getMore on hedge reads
(cherry picked from commit d05acff26c4c5a7fb27e9d0c7e31e4a3167fa7fe)
-rw-r--r--buildscripts/resmokeconfig/suites/causally_consistent_hedged_reads_jscore_passthrough.yml2
-rw-r--r--jstests/sharding/hedged_reads.js39
-rw-r--r--src/mongo/db/command_generic_argument.cpp5
-rw-r--r--src/mongo/db/cursor_manager.cpp5
-rw-r--r--src/mongo/db/operation_context.cpp21
-rw-r--r--src/mongo/db/operation_context.h14
-rw-r--r--src/mongo/db/query/query_request.cpp1
-rw-r--r--src/mongo/db/query/query_request.h1
-rw-r--r--src/mongo/db/service_entry_point_common.cpp72
-rw-r--r--src/mongo/executor/network_interface_tl.cpp31
10 files changed, 140 insertions, 51 deletions
diff --git a/buildscripts/resmokeconfig/suites/causally_consistent_hedged_reads_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/causally_consistent_hedged_reads_jscore_passthrough.yml
index e0d35ae3acb..71fb2fb59ae 100644
--- a/buildscripts/resmokeconfig/suites/causally_consistent_hedged_reads_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/causally_consistent_hedged_reads_jscore_passthrough.yml
@@ -137,7 +137,7 @@ executor:
mongos_options:
set_parameters:
enableTestCommands: 1
- #TODO SERVER-47117
+ #TODO SERVER-47543
maxTimeMSForHedgedReads: 10000
logComponentVerbosity:
verbosity: 0
diff --git a/jstests/sharding/hedged_reads.js b/jstests/sharding/hedged_reads.js
index f1d2e1a97ab..105fed6cb8c 100644
--- a/jstests/sharding/hedged_reads.js
+++ b/jstests/sharding/hedged_reads.js
@@ -74,10 +74,18 @@ const testDB = st.s.getDB(dbName);
const kBlockCmdTimeMS = 5 * 60 * 1000;
const kWaitKillOpTimeoutMS = 5 * 1000;
+const numDocs = 10;
assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
st.ensurePrimaryShard(dbName, st.shard0.shardName);
+let bulk = testDB[collName].initializeUnorderedBulkOp();
+for (let i = 0; i < numDocs; i++) {
+ bulk.insert({x: i});
+}
+assert.commandWorked(bulk.execute());
+assert.commandWorked(st.s.adminCommand({flushRouterConfig: 1}));
+
jsTest.log(
"Verify that maxTimeMS expiration of the additional request does not affect the command result");
// The hedged read will have the maxTimeMS set to 10ms, hence need to sleep longer than that.
@@ -113,6 +121,7 @@ try {
kWaitKillOpTimeoutMS);
} finally {
clearCommandDelay(sortedNodes[0]);
+ clearCommandDelay(sortedNodes[1]);
}
jsTest.log(
@@ -142,8 +151,38 @@ try {
"Timed out waiting for the operation run by the additional request to be killed",
kWaitKillOpTimeoutMS);
} finally {
+ clearCommandDelay(sortedNodes[0]);
clearCommandDelay(sortedNodes[1]);
}
+jsTest.log("Verify that the getMore on hedge request do not inherit maxTimeMS");
+try {
+ assert.commandWorked(st.s.adminCommand({setParameter: 1, maxTimeMSForHedgedReads: 100}));
+
+ // force to open hedge read cursor on sortedNodes[1]
+ setCommandDelay(sortedNodes[0], "find", 100, ns);
+
+ // $where with sleep is used because blocking command via failCommand does not affect the opCtx
+ // deadlines as it blocks and unblocks the command before it starts execution.
+ const comment = "test_getmore_on_additional_request_" + ObjectId();
+ let findRes = assert.commandWorked(testDB.runCommand({
+ find: collName,
+ filter: {$where: "sleep(200); return true;", x: {$gte: 0}},
+ $readPreference: {mode: "nearest"},
+ batchSize: 0,
+ comment: comment
+ }));
+
+ const cursorId = findRes.cursor.id;
+ assert.neq(0, cursorId);
+
+ // confirm that getMore does not time out.
+ let getMoreRes =
+ assert.commandWorked(testDB.runCommand({getMore: cursorId, collection: collName}));
+ assert.eq(getMoreRes.cursor.nextBatch.length, numDocs);
+} finally {
+ clearCommandDelay(sortedNodes[0]);
+}
+
st.stop();
}());
diff --git a/src/mongo/db/command_generic_argument.cpp b/src/mongo/db/command_generic_argument.cpp
index d03b4e64eae..85556c371a6 100644
--- a/src/mongo/db/command_generic_argument.cpp
+++ b/src/mongo/db/command_generic_argument.cpp
@@ -52,7 +52,7 @@ struct SpecialArgRecord {
// If that changes, it should be added. When you add to this list, consider whether you
// should also change the filterCommandRequestForPassthrough() function.
// clang-format off
-static constexpr std::array<SpecialArgRecord, 28> specials{{
+static constexpr std::array<SpecialArgRecord, 29> specials{{
// /-isGeneric
// | /-stripFromRequest
// | | /-stripFromReply
@@ -83,7 +83,8 @@ static constexpr std::array<SpecialArgRecord, 28> specials{{
{"operationTime"_sd, 0, 0, 1},
{"lastCommittedOpTime"_sd, 0, 0, 1},
{"readOnly"_sd, 0, 0, 1},
- {"comment"_sd, 1, 0, 0}}};
+ {"comment"_sd, 1, 0, 0},
+ {"maxTimeMSOpOnly"_sd, 1, 0, 0}}};
// clang-format on
template <bool SpecialArgRecord::*pmo>
diff --git a/src/mongo/db/cursor_manager.cpp b/src/mongo/db/cursor_manager.cpp
index eb7359d9f4d..c24abcdd8b5 100644
--- a/src/mongo/db/cursor_manager.cpp
+++ b/src/mongo/db/cursor_manager.cpp
@@ -367,6 +367,11 @@ ClientCursorPin CursorManager::registerCursor(OperationContext* opCtx,
_opKeyMap.emplace(*opKey, cursorId);
}
+ // Restores the maxTimeMS provided in the cursor generating command in the case it used
+ // maxTimeMSOpOnly. This way the pinned cursor will have the leftover time consistent with the
+ // maxTimeMS.
+ opCtx->restoreMaxTimeMS();
+
return ClientCursorPin(opCtx, unownedCursor, this);
}
diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp
index 5401f36c20a..260de1bf20b 100644
--- a/src/mongo/db/operation_context.cpp
+++ b/src/mongo/db/operation_context.cpp
@@ -181,6 +181,27 @@ Microseconds OperationContext::getRemainingMaxTimeMicros() const {
return _maxTime - getElapsedTime();
}
+void OperationContext::restoreMaxTimeMS() {
+ if (!_storedMaxTime) {
+ return;
+ }
+
+ auto maxTime = *_storedMaxTime;
+ _storedMaxTime = boost::none;
+
+ if (maxTime <= Microseconds::zero()) {
+ maxTime = Microseconds::max();
+ }
+
+ if (maxTime == Microseconds::max()) {
+ _deadline = Date_t::max();
+ } else {
+ auto clock = getServiceContext()->getFastClockSource();
+ _deadline = clock->now() + clock->getPrecision() + maxTime - _elapsedTime.elapsed();
+ }
+ _maxTime = maxTime;
+}
+
namespace {
// Helper function for checkForInterrupt fail point. Decides whether the operation currently
diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h
index c1e224bfee7..ea565afd52c 100644
--- a/src/mongo/db/operation_context.h
+++ b/src/mongo/db/operation_context.h
@@ -423,6 +423,15 @@ public:
return _exhaust;
}
+ void storeMaxTimeMS(Microseconds maxTime) {
+ _storedMaxTime = maxTime;
+ }
+
+ /**
+ * Restore deadline to match the value stored in _storedMaxTime.
+ */
+ void restoreMaxTimeMS();
+
private:
StatusWith<stdx::cv_status> waitForConditionOrInterruptNoAssertUntil(
stdx::condition_variable& cv, BasicLockableAdapter m, Date_t deadline) noexcept override;
@@ -540,12 +549,15 @@ private:
bool _isExecutingShutdown = false;
// Max operation time requested by the user or by the cursor in the case of a getMore with no
- // user-specified maxTime. This is tracked with microsecond granularity for the purpose of
+ // user-specified maxTimeMS. This is tracked with microsecond granularity for the purpose of
// assigning unused execution time back to a cursor at the end of an operation, only. The
// _deadline and the service context's fast clock are the only values consulted for determining
// if the operation's timelimit has been exceeded.
Microseconds _maxTime = Microseconds::max();
+ // The value of the maxTimeMS requested by user in the case it was overwritten.
+ boost::optional<Microseconds> _storedMaxTime;
+
// Timer counting the elapsed time since the construction of this OperationContext.
Timer _elapsedTime;
diff --git a/src/mongo/db/query/query_request.cpp b/src/mongo/db/query/query_request.cpp
index dd80516535c..f5ac96d3d43 100644
--- a/src/mongo/db/query/query_request.cpp
+++ b/src/mongo/db/query/query_request.cpp
@@ -110,6 +110,7 @@ const char QueryRequest::kInternalReadAtClusterTimeField[] = "$_internalReadAtCl
const char QueryRequest::kRequestResumeTokenField[] = "$_requestResumeToken";
const char QueryRequest::kResumeAfterField[] = "$_resumeAfter";
const char QueryRequest::kUse44SortKeys[] = "_use44SortKeys";
+const char QueryRequest::kMaxTimeMSOpOnlyField[] = "maxTimeMSOpOnly";
// Field names for sorting options.
const char QueryRequest::kNaturalSortField[] = "$natural";
diff --git a/src/mongo/db/query/query_request.h b/src/mongo/db/query/query_request.h
index 3593489cec8..115e8de7b60 100644
--- a/src/mongo/db/query/query_request.h
+++ b/src/mongo/db/query/query_request.h
@@ -80,6 +80,7 @@ public:
static const char kRequestResumeTokenField[];
static const char kResumeAfterField[];
static const char kUse44SortKeys[];
+ static const char kMaxTimeMSOpOnlyField[];
static const char kNaturalSortField[];
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index f773374f44c..0301647444c 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -981,6 +981,7 @@ void execCommandDatabase(OperationContext* opCtx,
std::unique_ptr<MaintenanceModeSetter> mmSetter;
BSONElement cmdOptionMaxTimeMSField;
+ BSONElement maxTimeMSOpOnlyField;
BSONElement allowImplicitCollectionCreationField;
BSONElement helpField;
@@ -989,6 +990,11 @@ void execCommandDatabase(OperationContext* opCtx,
StringData fieldName = element.fieldNameStringData();
if (fieldName == QueryRequest::cmdOptionMaxTimeMS) {
cmdOptionMaxTimeMSField = element;
+ } else if (fieldName == QueryRequest::kMaxTimeMSOpOnlyField) {
+ uassert(ErrorCodes::InvalidOptions,
+ "Can not specify maxTimeMSOpOnly for non internal clients",
+ isInternalClient);
+ maxTimeMSOpOnlyField = element;
} else if (fieldName == "allowImplicitCollectionCreation") {
allowImplicitCollectionCreationField = element;
} else if (fieldName == CommandHelpers::kHelpFieldName) {
@@ -1008,9 +1014,9 @@ void execCommandDatabase(OperationContext* opCtx,
if (CommandHelpers::isHelpRequest(helpField)) {
CurOp::get(opCtx)->ensureStarted();
- // We disable last-error for help requests due to SERVER-11492, because config servers
- // use help requests to determine which commands are database writes, and so must be
- // forwarded to all config servers.
+ // We disable last-error for help requests due to SERVER-11492, because config
+ // servers use help requests to determine which commands are database writes, and so
+ // must be forwarded to all config servers.
LastError::get(opCtx->getClient()).disable();
Command::generateHelpResponse(opCtx, replyBuilder, *command);
return;
@@ -1076,20 +1082,28 @@ void execCommandDatabase(OperationContext* opCtx,
opCounters->gotCommand();
}
- // Parse the 'maxTimeMS' command option, and use it to set a deadline for the operation on
- // the OperationContext. The 'maxTimeMS' option unfortunately has a different meaning for a
- // getMore command, where it is used to communicate the maximum time to wait for new inserts
- // on tailable cursors, not as a deadline for the operation.
+ // Parse the 'maxTimeMS' command option, and use it to set a deadline for the operation
+ // on the OperationContext. The 'maxTimeMS' option unfortunately has a different meaning
+ // for a getMore command, where it is used to communicate the maximum time to wait for
+ // new inserts on tailable cursors, not as a deadline for the operation.
// TODO SERVER-34277 Remove the special handling for maxTimeMS for getMores. This will
- // require introducing a new 'max await time' parameter for getMore, and eventually banning
- // maxTimeMS altogether on a getMore command.
- const int maxTimeMS =
- uassertStatusOK(QueryRequest::parseMaxTimeMS(cmdOptionMaxTimeMSField));
- if (maxTimeMS > 0 && command->getLogicalOp() != LogicalOp::opGetMore) {
+ // require introducing a new 'max await time' parameter for getMore, and eventually
+ // banning maxTimeMS altogether on a getMore command.
+ int maxTimeMS = uassertStatusOK(QueryRequest::parseMaxTimeMS(cmdOptionMaxTimeMSField));
+ int maxTimeMSOpOnly = uassertStatusOK(QueryRequest::parseMaxTimeMS(maxTimeMSOpOnlyField));
+
+ if ((maxTimeMS > 0 || maxTimeMSOpOnly > 0) &&
+ command->getLogicalOp() != LogicalOp::opGetMore) {
uassert(40119,
"Illegal attempt to set operation deadline within DBDirectClient",
!opCtx->getClient()->isInDirectClient());
- opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMS}, ErrorCodes::MaxTimeMSExpired);
+ if (maxTimeMSOpOnly > 0 && (maxTimeMS == 0 || maxTimeMSOpOnly < maxTimeMS)) {
+ opCtx->storeMaxTimeMS(Milliseconds{maxTimeMS});
+ opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMSOpOnly},
+ ErrorCodes::MaxTimeMSExpired);
+ } else if (maxTimeMS > 0) {
+ opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMS}, ErrorCodes::MaxTimeMSExpired);
+ }
}
auto& readConcernArgs = repl::ReadConcernArgs::get(opCtx);
@@ -1130,8 +1144,8 @@ void execCommandDatabase(OperationContext* opCtx,
opCtx->lockState()->setShouldConflictWithSecondaryBatchApplication(false);
}
- // Remember whether or not this operation is starting a transaction, in case something later
- // in the execution needs to adjust its behavior based on this.
+ // Remember whether or not this operation is starting a transaction, in case something
+ // later in the execution needs to adjust its behavior based on this.
opCtx->setIsStartingMultiDocumentTransaction(startTransaction);
auto& oss = OperationShardingState::get(opCtx);
@@ -1156,9 +1170,10 @@ void execCommandDatabase(OperationContext* opCtx,
// This may trigger the maxTimeAlwaysTimeOut failpoint.
auto status = opCtx->checkForInterruptNoAssert();
- // We still proceed if the primary stepped down, but accept other kinds of interruptions.
- // We defer to individual commands to allow themselves to be interruptible by stepdowns,
- // since commands like 'voteRequest' should conversely continue executing.
+ // We still proceed if the primary stepped down, but accept other kinds of
+ // interruptions. We defer to individual commands to allow themselves to be
+ // interruptible by stepdowns, since commands like 'voteRequest' should conversely
+ // continue executing.
if (status != ErrorCodes::PrimarySteppedDown &&
status != ErrorCodes::InterruptedDueToReplStateChange) {
uassertStatusOK(status);
@@ -1208,10 +1223,10 @@ void execCommandDatabase(OperationContext* opCtx,
auto engine = opCtx->getServiceContext()->getStorageEngine();
invariant(engine && engine->supportsReadConcernSnapshot());
- // SnapshotTooOld errors indicate that PIT ops are failing to find an available snapshot
- // at their specified atClusterTime. Therefore, we'll try to increase the snapshot
- // history window that the storage engine maintains in order to increase the likelihood
- // of successful future PIT atClusterTime requests.
+ // SnapshotTooOld errors indicate that PIT ops are failing to find an available
+ // snapshot at their specified atClusterTime. Therefore, we'll try to increase the
+ // snapshot history window that the storage engine maintains in order to increase
+ // the likelihood of successful future PIT atClusterTime requests.
SnapshotWindowUtil::incrementSnapshotTooOldErrorCount();
SnapshotWindowUtil::increaseTargetSnapshotWindowSize(opCtx);
} else {
@@ -1349,7 +1364,8 @@ DbResponse receivedCommands(OperationContext* opCtx,
ServiceEntryPointCommon::getRedactedCopyForLogging(c, request.body)));
{
- // Try to set this as early as possible, as soon as we have figured out the command.
+ // Try to set this as early as possible, as soon as we have figured out the
+ // command.
stdx::lock_guard<Client> lk(*opCtx->getClient());
CurOp::get(opCtx)->setLogicalOp_inlock(c->getLogicalOp());
}
@@ -1588,12 +1604,12 @@ DbResponse receivedGetMore(OperationContext* opCtx,
// Make sure that killCursorGlobal does not throw an exception if it is interrupted.
UninterruptibleLockGuard noInterrupt(opCtx->lockState());
- // If an error was thrown prior to auth checks, then the cursor should remain alive in
- // order to prevent an unauthorized user from resulting in the death of a cursor. In
- // other error cases, the cursor is dead and should be cleaned up.
+ // If an error was thrown prior to auth checks, then the cursor should remain alive
+ // in order to prevent an unauthorized user from resulting in the death of a cursor.
+ // In other error cases, the cursor is dead and should be cleaned up.
//
- // If killing the cursor fails, ignore the error and don't try again. The cursor should
- // be reaped by the client cursor timeout thread.
+ // If killing the cursor fails, ignore the error and don't try again. The cursor
+ // should be reaped by the client cursor timeout thread.
CursorManager::get(opCtx)
->killCursor(opCtx, cursorid, false /* shouldAudit */)
.ignore();
diff --git a/src/mongo/executor/network_interface_tl.cpp b/src/mongo/executor/network_interface_tl.cpp
index ea058794a53..b00c1024e74 100644
--- a/src/mongo/executor/network_interface_tl.cpp
+++ b/src/mongo/executor/network_interface_tl.cpp
@@ -47,7 +47,7 @@ namespace mongo {
namespace executor {
namespace {
-static inline const std::string kMaxTimeMSOptionName = "maxTimeMS";
+static inline const std::string kMaxTimeMSOpOnlyField = "maxTimeMSOpOnly";
} // unnamed namespace
/**
@@ -704,26 +704,19 @@ void NetworkInterfaceTL::RequestManager::trySend(
if (requestState->isHedge) {
invariant(cmdStatePtr->requestOnAny.hedgeOptions);
-
- // Attach a maxTimeMS to the request.
auto maxTimeMS = request.hedgeOptions->maxTimeMSForHedgedReads;
- if (request.timeout == request.kNoTimeout || request.timeout > Milliseconds(maxTimeMS)) {
- BSONObjBuilder updatedCmdBuilder;
- for (const auto& elem : request.cmdObj) {
- if (elem.fieldNameStringData() != kMaxTimeMSOptionName) {
- updatedCmdBuilder.append(elem);
- }
- }
- updatedCmdBuilder.append(kMaxTimeMSOptionName, maxTimeMS);
- request.cmdObj = updatedCmdBuilder.obj();
- LOGV2_DEBUG(4647200,
- 2,
- "Set maxTimeMS for request",
- "maxTimeMS"_attr = maxTimeMS,
- "request_id"_attr = cmdStatePtr->requestOnAny.id,
- "target"_attr = cmdStatePtr->requestOnAny.target[idx]);
- }
+ BSONObjBuilder updatedCmdBuilder;
+ updatedCmdBuilder.appendElements(request.cmdObj);
+ updatedCmdBuilder.append(kMaxTimeMSOpOnlyField, maxTimeMS);
+ request.cmdObj = updatedCmdBuilder.obj();
+
+ LOGV2_DEBUG(4647200,
+ 2,
+ "Setup hedge request",
+ "request_id"_attr = cmdStatePtr->requestOnAny.id,
+ "request"_attr = redact(request.toString()),
+ "target"_attr = cmdStatePtr->requestOnAny.target[idx]);
if (cmdStatePtr->interface->_svcCtx) {
auto hm = HedgingMetrics::get(cmdStatePtr->interface->_svcCtx);