diff options
author | Misha Tyulenev <misha@mongodb.com> | 2020-04-16 14:04:31 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-23 16:36:41 +0000 |
commit | 7ee6f5f0157eea08876c705dcf6e02309d6153da (patch) | |
tree | dd69a74576b02be018e9a0afd3e1083a6ae9d1b9 | |
parent | c3b47f6e122ae36ff297944a9d90ab9f1151ec17 (diff) | |
download | mongo-7ee6f5f0157eea08876c705dcf6e02309d6153da.tar.gz |
SERVER-47117 clear maxTimeMSForHedgedReads delay from the getMore on hedge reads
(cherry picked from commit d05acff26c4c5a7fb27e9d0c7e31e4a3167fa7fe)
-rw-r--r-- | buildscripts/resmokeconfig/suites/causally_consistent_hedged_reads_jscore_passthrough.yml | 2 | ||||
-rw-r--r-- | jstests/sharding/hedged_reads.js | 39 | ||||
-rw-r--r-- | src/mongo/db/command_generic_argument.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/cursor_manager.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/operation_context.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/operation_context.h | 14 | ||||
-rw-r--r-- | src/mongo/db/query/query_request.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/query/query_request.h | 1 | ||||
-rw-r--r-- | src/mongo/db/service_entry_point_common.cpp | 72 | ||||
-rw-r--r-- | src/mongo/executor/network_interface_tl.cpp | 31 |
10 files changed, 140 insertions, 51 deletions
diff --git a/buildscripts/resmokeconfig/suites/causally_consistent_hedged_reads_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/causally_consistent_hedged_reads_jscore_passthrough.yml index e0d35ae3acb..71fb2fb59ae 100644 --- a/buildscripts/resmokeconfig/suites/causally_consistent_hedged_reads_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/causally_consistent_hedged_reads_jscore_passthrough.yml @@ -137,7 +137,7 @@ executor: mongos_options: set_parameters: enableTestCommands: 1 - #TODO SERVER-47117 + #TODO SERVER-47543 maxTimeMSForHedgedReads: 10000 logComponentVerbosity: verbosity: 0 diff --git a/jstests/sharding/hedged_reads.js b/jstests/sharding/hedged_reads.js index f1d2e1a97ab..105fed6cb8c 100644 --- a/jstests/sharding/hedged_reads.js +++ b/jstests/sharding/hedged_reads.js @@ -74,10 +74,18 @@ const testDB = st.s.getDB(dbName); const kBlockCmdTimeMS = 5 * 60 * 1000; const kWaitKillOpTimeoutMS = 5 * 1000; +const numDocs = 10; assert.commandWorked(st.s.adminCommand({enableSharding: dbName})); st.ensurePrimaryShard(dbName, st.shard0.shardName); +let bulk = testDB[collName].initializeUnorderedBulkOp(); +for (let i = 0; i < numDocs; i++) { + bulk.insert({x: i}); +} +assert.commandWorked(bulk.execute()); +assert.commandWorked(st.s.adminCommand({flushRouterConfig: 1})); + jsTest.log( "Verify that maxTimeMS expiration of the additional request does not affect the command result"); // The hedged read will have the maxTimeMS set to 10ms, hence need to sleep longer than that. @@ -113,6 +121,7 @@ try { kWaitKillOpTimeoutMS); } finally { clearCommandDelay(sortedNodes[0]); + clearCommandDelay(sortedNodes[1]); } jsTest.log( @@ -142,8 +151,38 @@ try { "Timed out waiting for the operation run by the additional request to be killed", kWaitKillOpTimeoutMS); } finally { + clearCommandDelay(sortedNodes[0]); clearCommandDelay(sortedNodes[1]); } +jsTest.log("Verify that the getMore on hedge request do not inherit maxTimeMS"); +try { + assert.commandWorked(st.s.adminCommand({setParameter: 1, maxTimeMSForHedgedReads: 100})); + + // force to open hedge read cursor on sortedNodes[1] + setCommandDelay(sortedNodes[0], "find", 100, ns); + + // $where with sleep is used because blocking command via failCommand does not affect the opCtx + // deadlines as it blocks and unblocks the command before it starts execution. + const comment = "test_getmore_on_additional_request_" + ObjectId(); + let findRes = assert.commandWorked(testDB.runCommand({ + find: collName, + filter: {$where: "sleep(200); return true;", x: {$gte: 0}}, + $readPreference: {mode: "nearest"}, + batchSize: 0, + comment: comment + })); + + const cursorId = findRes.cursor.id; + assert.neq(0, cursorId); + + // confirm that getMore does not time out. + let getMoreRes = + assert.commandWorked(testDB.runCommand({getMore: cursorId, collection: collName})); + assert.eq(getMoreRes.cursor.nextBatch.length, numDocs); +} finally { + clearCommandDelay(sortedNodes[0]); +} + st.stop(); }()); diff --git a/src/mongo/db/command_generic_argument.cpp b/src/mongo/db/command_generic_argument.cpp index d03b4e64eae..85556c371a6 100644 --- a/src/mongo/db/command_generic_argument.cpp +++ b/src/mongo/db/command_generic_argument.cpp @@ -52,7 +52,7 @@ struct SpecialArgRecord { // If that changes, it should be added. When you add to this list, consider whether you // should also change the filterCommandRequestForPassthrough() function. // clang-format off -static constexpr std::array<SpecialArgRecord, 28> specials{{ +static constexpr std::array<SpecialArgRecord, 29> specials{{ // /-isGeneric // | /-stripFromRequest // | | /-stripFromReply @@ -83,7 +83,8 @@ static constexpr std::array<SpecialArgRecord, 28> specials{{ {"operationTime"_sd, 0, 0, 1}, {"lastCommittedOpTime"_sd, 0, 0, 1}, {"readOnly"_sd, 0, 0, 1}, - {"comment"_sd, 1, 0, 0}}}; + {"comment"_sd, 1, 0, 0}, + {"maxTimeMSOpOnly"_sd, 1, 0, 0}}}; // clang-format on template <bool SpecialArgRecord::*pmo> diff --git a/src/mongo/db/cursor_manager.cpp b/src/mongo/db/cursor_manager.cpp index eb7359d9f4d..c24abcdd8b5 100644 --- a/src/mongo/db/cursor_manager.cpp +++ b/src/mongo/db/cursor_manager.cpp @@ -367,6 +367,11 @@ ClientCursorPin CursorManager::registerCursor(OperationContext* opCtx, _opKeyMap.emplace(*opKey, cursorId); } + // Restores the maxTimeMS provided in the cursor generating command in the case it used + // maxTimeMSOpOnly. This way the pinned cursor will have the leftover time consistent with the + // maxTimeMS. + opCtx->restoreMaxTimeMS(); + return ClientCursorPin(opCtx, unownedCursor, this); } diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp index 5401f36c20a..260de1bf20b 100644 --- a/src/mongo/db/operation_context.cpp +++ b/src/mongo/db/operation_context.cpp @@ -181,6 +181,27 @@ Microseconds OperationContext::getRemainingMaxTimeMicros() const { return _maxTime - getElapsedTime(); } +void OperationContext::restoreMaxTimeMS() { + if (!_storedMaxTime) { + return; + } + + auto maxTime = *_storedMaxTime; + _storedMaxTime = boost::none; + + if (maxTime <= Microseconds::zero()) { + maxTime = Microseconds::max(); + } + + if (maxTime == Microseconds::max()) { + _deadline = Date_t::max(); + } else { + auto clock = getServiceContext()->getFastClockSource(); + _deadline = clock->now() + clock->getPrecision() + maxTime - _elapsedTime.elapsed(); + } + _maxTime = maxTime; +} + namespace { // Helper function for checkForInterrupt fail point. Decides whether the operation currently diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h index c1e224bfee7..ea565afd52c 100644 --- a/src/mongo/db/operation_context.h +++ b/src/mongo/db/operation_context.h @@ -423,6 +423,15 @@ public: return _exhaust; } + void storeMaxTimeMS(Microseconds maxTime) { + _storedMaxTime = maxTime; + } + + /** + * Restore deadline to match the value stored in _storedMaxTime. + */ + void restoreMaxTimeMS(); + private: StatusWith<stdx::cv_status> waitForConditionOrInterruptNoAssertUntil( stdx::condition_variable& cv, BasicLockableAdapter m, Date_t deadline) noexcept override; @@ -540,12 +549,15 @@ private: bool _isExecutingShutdown = false; // Max operation time requested by the user or by the cursor in the case of a getMore with no - // user-specified maxTime. This is tracked with microsecond granularity for the purpose of + // user-specified maxTimeMS. This is tracked with microsecond granularity for the purpose of // assigning unused execution time back to a cursor at the end of an operation, only. The // _deadline and the service context's fast clock are the only values consulted for determining // if the operation's timelimit has been exceeded. Microseconds _maxTime = Microseconds::max(); + // The value of the maxTimeMS requested by user in the case it was overwritten. + boost::optional<Microseconds> _storedMaxTime; + // Timer counting the elapsed time since the construction of this OperationContext. Timer _elapsedTime; diff --git a/src/mongo/db/query/query_request.cpp b/src/mongo/db/query/query_request.cpp index dd80516535c..f5ac96d3d43 100644 --- a/src/mongo/db/query/query_request.cpp +++ b/src/mongo/db/query/query_request.cpp @@ -110,6 +110,7 @@ const char QueryRequest::kInternalReadAtClusterTimeField[] = "$_internalReadAtCl const char QueryRequest::kRequestResumeTokenField[] = "$_requestResumeToken"; const char QueryRequest::kResumeAfterField[] = "$_resumeAfter"; const char QueryRequest::kUse44SortKeys[] = "_use44SortKeys"; +const char QueryRequest::kMaxTimeMSOpOnlyField[] = "maxTimeMSOpOnly"; // Field names for sorting options. const char QueryRequest::kNaturalSortField[] = "$natural"; diff --git a/src/mongo/db/query/query_request.h b/src/mongo/db/query/query_request.h index 3593489cec8..115e8de7b60 100644 --- a/src/mongo/db/query/query_request.h +++ b/src/mongo/db/query/query_request.h @@ -80,6 +80,7 @@ public: static const char kRequestResumeTokenField[]; static const char kResumeAfterField[]; static const char kUse44SortKeys[]; + static const char kMaxTimeMSOpOnlyField[]; static const char kNaturalSortField[]; diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp index f773374f44c..0301647444c 100644 --- a/src/mongo/db/service_entry_point_common.cpp +++ b/src/mongo/db/service_entry_point_common.cpp @@ -981,6 +981,7 @@ void execCommandDatabase(OperationContext* opCtx, std::unique_ptr<MaintenanceModeSetter> mmSetter; BSONElement cmdOptionMaxTimeMSField; + BSONElement maxTimeMSOpOnlyField; BSONElement allowImplicitCollectionCreationField; BSONElement helpField; @@ -989,6 +990,11 @@ void execCommandDatabase(OperationContext* opCtx, StringData fieldName = element.fieldNameStringData(); if (fieldName == QueryRequest::cmdOptionMaxTimeMS) { cmdOptionMaxTimeMSField = element; + } else if (fieldName == QueryRequest::kMaxTimeMSOpOnlyField) { + uassert(ErrorCodes::InvalidOptions, + "Can not specify maxTimeMSOpOnly for non internal clients", + isInternalClient); + maxTimeMSOpOnlyField = element; } else if (fieldName == "allowImplicitCollectionCreation") { allowImplicitCollectionCreationField = element; } else if (fieldName == CommandHelpers::kHelpFieldName) { @@ -1008,9 +1014,9 @@ void execCommandDatabase(OperationContext* opCtx, if (CommandHelpers::isHelpRequest(helpField)) { CurOp::get(opCtx)->ensureStarted(); - // We disable last-error for help requests due to SERVER-11492, because config servers - // use help requests to determine which commands are database writes, and so must be - // forwarded to all config servers. + // We disable last-error for help requests due to SERVER-11492, because config + // servers use help requests to determine which commands are database writes, and so + // must be forwarded to all config servers. LastError::get(opCtx->getClient()).disable(); Command::generateHelpResponse(opCtx, replyBuilder, *command); return; @@ -1076,20 +1082,28 @@ void execCommandDatabase(OperationContext* opCtx, opCounters->gotCommand(); } - // Parse the 'maxTimeMS' command option, and use it to set a deadline for the operation on - // the OperationContext. The 'maxTimeMS' option unfortunately has a different meaning for a - // getMore command, where it is used to communicate the maximum time to wait for new inserts - // on tailable cursors, not as a deadline for the operation. + // Parse the 'maxTimeMS' command option, and use it to set a deadline for the operation + // on the OperationContext. The 'maxTimeMS' option unfortunately has a different meaning + // for a getMore command, where it is used to communicate the maximum time to wait for + // new inserts on tailable cursors, not as a deadline for the operation. // TODO SERVER-34277 Remove the special handling for maxTimeMS for getMores. This will - // require introducing a new 'max await time' parameter for getMore, and eventually banning - // maxTimeMS altogether on a getMore command. - const int maxTimeMS = - uassertStatusOK(QueryRequest::parseMaxTimeMS(cmdOptionMaxTimeMSField)); - if (maxTimeMS > 0 && command->getLogicalOp() != LogicalOp::opGetMore) { + // require introducing a new 'max await time' parameter for getMore, and eventually + // banning maxTimeMS altogether on a getMore command. + int maxTimeMS = uassertStatusOK(QueryRequest::parseMaxTimeMS(cmdOptionMaxTimeMSField)); + int maxTimeMSOpOnly = uassertStatusOK(QueryRequest::parseMaxTimeMS(maxTimeMSOpOnlyField)); + + if ((maxTimeMS > 0 || maxTimeMSOpOnly > 0) && + command->getLogicalOp() != LogicalOp::opGetMore) { uassert(40119, "Illegal attempt to set operation deadline within DBDirectClient", !opCtx->getClient()->isInDirectClient()); - opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMS}, ErrorCodes::MaxTimeMSExpired); + if (maxTimeMSOpOnly > 0 && (maxTimeMS == 0 || maxTimeMSOpOnly < maxTimeMS)) { + opCtx->storeMaxTimeMS(Milliseconds{maxTimeMS}); + opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMSOpOnly}, + ErrorCodes::MaxTimeMSExpired); + } else if (maxTimeMS > 0) { + opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMS}, ErrorCodes::MaxTimeMSExpired); + } } auto& readConcernArgs = repl::ReadConcernArgs::get(opCtx); @@ -1130,8 +1144,8 @@ void execCommandDatabase(OperationContext* opCtx, opCtx->lockState()->setShouldConflictWithSecondaryBatchApplication(false); } - // Remember whether or not this operation is starting a transaction, in case something later - // in the execution needs to adjust its behavior based on this. + // Remember whether or not this operation is starting a transaction, in case something + // later in the execution needs to adjust its behavior based on this. opCtx->setIsStartingMultiDocumentTransaction(startTransaction); auto& oss = OperationShardingState::get(opCtx); @@ -1156,9 +1170,10 @@ void execCommandDatabase(OperationContext* opCtx, // This may trigger the maxTimeAlwaysTimeOut failpoint. auto status = opCtx->checkForInterruptNoAssert(); - // We still proceed if the primary stepped down, but accept other kinds of interruptions. - // We defer to individual commands to allow themselves to be interruptible by stepdowns, - // since commands like 'voteRequest' should conversely continue executing. + // We still proceed if the primary stepped down, but accept other kinds of + // interruptions. We defer to individual commands to allow themselves to be + // interruptible by stepdowns, since commands like 'voteRequest' should conversely + // continue executing. if (status != ErrorCodes::PrimarySteppedDown && status != ErrorCodes::InterruptedDueToReplStateChange) { uassertStatusOK(status); @@ -1208,10 +1223,10 @@ void execCommandDatabase(OperationContext* opCtx, auto engine = opCtx->getServiceContext()->getStorageEngine(); invariant(engine && engine->supportsReadConcernSnapshot()); - // SnapshotTooOld errors indicate that PIT ops are failing to find an available snapshot - // at their specified atClusterTime. Therefore, we'll try to increase the snapshot - // history window that the storage engine maintains in order to increase the likelihood - // of successful future PIT atClusterTime requests. + // SnapshotTooOld errors indicate that PIT ops are failing to find an available + // snapshot at their specified atClusterTime. Therefore, we'll try to increase the + // snapshot history window that the storage engine maintains in order to increase + // the likelihood of successful future PIT atClusterTime requests. SnapshotWindowUtil::incrementSnapshotTooOldErrorCount(); SnapshotWindowUtil::increaseTargetSnapshotWindowSize(opCtx); } else { @@ -1349,7 +1364,8 @@ DbResponse receivedCommands(OperationContext* opCtx, ServiceEntryPointCommon::getRedactedCopyForLogging(c, request.body))); { - // Try to set this as early as possible, as soon as we have figured out the command. + // Try to set this as early as possible, as soon as we have figured out the + // command. stdx::lock_guard<Client> lk(*opCtx->getClient()); CurOp::get(opCtx)->setLogicalOp_inlock(c->getLogicalOp()); } @@ -1588,12 +1604,12 @@ DbResponse receivedGetMore(OperationContext* opCtx, // Make sure that killCursorGlobal does not throw an exception if it is interrupted. UninterruptibleLockGuard noInterrupt(opCtx->lockState()); - // If an error was thrown prior to auth checks, then the cursor should remain alive in - // order to prevent an unauthorized user from resulting in the death of a cursor. In - // other error cases, the cursor is dead and should be cleaned up. + // If an error was thrown prior to auth checks, then the cursor should remain alive + // in order to prevent an unauthorized user from resulting in the death of a cursor. + // In other error cases, the cursor is dead and should be cleaned up. // - // If killing the cursor fails, ignore the error and don't try again. The cursor should - // be reaped by the client cursor timeout thread. + // If killing the cursor fails, ignore the error and don't try again. The cursor + // should be reaped by the client cursor timeout thread. CursorManager::get(opCtx) ->killCursor(opCtx, cursorid, false /* shouldAudit */) .ignore(); diff --git a/src/mongo/executor/network_interface_tl.cpp b/src/mongo/executor/network_interface_tl.cpp index ea058794a53..b00c1024e74 100644 --- a/src/mongo/executor/network_interface_tl.cpp +++ b/src/mongo/executor/network_interface_tl.cpp @@ -47,7 +47,7 @@ namespace mongo { namespace executor { namespace { -static inline const std::string kMaxTimeMSOptionName = "maxTimeMS"; +static inline const std::string kMaxTimeMSOpOnlyField = "maxTimeMSOpOnly"; } // unnamed namespace /** @@ -704,26 +704,19 @@ void NetworkInterfaceTL::RequestManager::trySend( if (requestState->isHedge) { invariant(cmdStatePtr->requestOnAny.hedgeOptions); - - // Attach a maxTimeMS to the request. auto maxTimeMS = request.hedgeOptions->maxTimeMSForHedgedReads; - if (request.timeout == request.kNoTimeout || request.timeout > Milliseconds(maxTimeMS)) { - BSONObjBuilder updatedCmdBuilder; - for (const auto& elem : request.cmdObj) { - if (elem.fieldNameStringData() != kMaxTimeMSOptionName) { - updatedCmdBuilder.append(elem); - } - } - updatedCmdBuilder.append(kMaxTimeMSOptionName, maxTimeMS); - request.cmdObj = updatedCmdBuilder.obj(); - LOGV2_DEBUG(4647200, - 2, - "Set maxTimeMS for request", - "maxTimeMS"_attr = maxTimeMS, - "request_id"_attr = cmdStatePtr->requestOnAny.id, - "target"_attr = cmdStatePtr->requestOnAny.target[idx]); - } + BSONObjBuilder updatedCmdBuilder; + updatedCmdBuilder.appendElements(request.cmdObj); + updatedCmdBuilder.append(kMaxTimeMSOpOnlyField, maxTimeMS); + request.cmdObj = updatedCmdBuilder.obj(); + + LOGV2_DEBUG(4647200, + 2, + "Setup hedge request", + "request_id"_attr = cmdStatePtr->requestOnAny.id, + "request"_attr = redact(request.toString()), + "target"_attr = cmdStatePtr->requestOnAny.target[idx]); if (cmdStatePtr->interface->_svcCtx) { auto hm = HedgingMetrics::get(cmdStatePtr->interface->_svcCtx); |