summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharlie Swanson <charlie.swanson@mongodb.com>2018-03-30 15:54:57 -0400
committerCharlie Swanson <charlie.swanson@mongodb.com>2018-04-05 10:08:08 -0400
commit83e1a7097433b98109bbdf67f4ae3eb2421c926f (patch)
tree0019bc517295a9712549f9db7f4b1eca8b5f17c3
parent8d4c7404718bd32de68f3cce500cc160f9f7ecef (diff)
downloadmongo-83e1a7097433b98109bbdf67f4ae3eb2421c926f.tar.gz
SERVER-33942 Avoid setting deadline for maxTimeMS on tailable cursors
-rw-r--r--jstests/noPassthrough/tailable_getmore_to_mongos_does_not_timeout.js29
-rw-r--r--jstests/noPassthroughWithMongod/tailable_getmore_does_not_timeout.js57
-rw-r--r--src/mongo/db/commands/getmore_cmd.cpp1
-rw-r--r--src/mongo/db/operation_context.h8
-rw-r--r--src/mongo/db/service_entry_point_common.cpp22
-rw-r--r--src/mongo/s/commands/cluster_getmore_cmd.cpp4
-rw-r--r--src/mongo/s/commands/strategy.cpp30
-rw-r--r--src/mongo/s/query/cluster_find.cpp9
8 files changed, 123 insertions, 37 deletions
diff --git a/jstests/noPassthrough/tailable_getmore_to_mongos_does_not_timeout.js b/jstests/noPassthrough/tailable_getmore_to_mongos_does_not_timeout.js
new file mode 100644
index 00000000000..7c37d73ebdd
--- /dev/null
+++ b/jstests/noPassthrough/tailable_getmore_to_mongos_does_not_timeout.js
@@ -0,0 +1,29 @@
+// Tests that specifying a maxTimeMS on a getMore request to mongos is not interpreted as a deadline
+// for the operationfor a tailable + awaitData cursor.
+// This test was designed to reproduce SERVER-33942 against a mongos.
+// @tags: [requires_sharding]
+(function() {
+ "use strict";
+
+ const st = new ShardingTest({shards: 2});
+
+ const db = st.s.getDB("test");
+ const coll = db.capped;
+ assert.commandWorked(db.runCommand({create: "capped", capped: true, size: 1024}));
+ assert.writeOK(coll.insert({}));
+ const findResult = assert.commandWorked(
+ db.runCommand({find: "capped", filter: {}, tailable: true, awaitData: true}));
+
+ const cursorId = findResult.cursor.id;
+ assert.neq(cursorId, 0);
+
+ // Test that the getMores on this tailable cursor are immune to interrupt.
+ assert.commandWorked(
+ db.adminCommand({configureFailPoint: "maxTimeAlwaysTimeOut", mode: "alwaysOn"}));
+ assert.commandWorked(db.runCommand({getMore: cursorId, collection: "capped", maxTimeMS: 30}));
+ assert.commandWorked(db.runCommand({getMore: cursorId, collection: "capped"}));
+ assert.commandWorked(
+ db.adminCommand({configureFailPoint: "maxTimeAlwaysTimeOut", mode: "off"}));
+
+ st.stop();
+}());
diff --git a/jstests/noPassthroughWithMongod/tailable_getmore_does_not_timeout.js b/jstests/noPassthroughWithMongod/tailable_getmore_does_not_timeout.js
new file mode 100644
index 00000000000..0e8d05db09b
--- /dev/null
+++ b/jstests/noPassthroughWithMongod/tailable_getmore_does_not_timeout.js
@@ -0,0 +1,57 @@
+// Tests that specifying a maxTimeMS on a getMore for a tailable + awaitData cursor is not
+// interpreted as a deadline for the operation.
+// This test was designed to reproduce SERVER-33942 against a mongod.
+(function() {
+ "use strict";
+
+ const coll = db.tailable_getmore_no_timeout;
+ coll.drop();
+
+ assert.commandWorked(db.runCommand({create: coll.getName(), capped: true, size: 1024}));
+
+ for (let i = 0; i < 10; ++i) {
+ assert.writeOK(coll.insert({_id: i}));
+ }
+
+ const findResponse = assert.commandWorked(
+ db.runCommand({find: coll.getName(), filter: {}, tailable: true, awaitData: true}));
+ const cursorId = findResponse.cursor.id;
+ assert.neq(0, cursorId);
+
+ // Start an operation in a parallel shell that holds the lock for a while.
+ const awaitSleepShell = startParallelShell(
+ () => assert.commandFailedWithCode(db.adminCommand({sleep: 1, lock: "w", secs: 600}),
+ ErrorCodes.Interrupted));
+
+ // Start a getMore and verify that it is waiting for the lock.
+ const getMoreMaxTimeMS = 10;
+ const awaitGetMoreShell = startParallelShell(`
+ // Wait for the sleep command to take the lock.
+ assert.soon(() => db.getSiblingDB("admin")
+ .currentOp({"command.sleep": 1, active: true})
+ .inprog.length === 1);
+ // Start the getMore with a low maxTimeMS.
+ assert.commandWorked(db.runCommand({
+ getMore: ${cursorId.toString()},
+ collection: "${coll.getName()}",
+ maxTimeMS: ${getMoreMaxTimeMS}
+ }));
+ `);
+
+ // Wait to see the getMore waiting on the lock.
+ assert.soon(
+ () =>
+ db.currentOp({"command.getMore": cursorId, waitingForLock: true}).inprog.length === 1);
+
+ // Sleep for twice the maxTimeMS to prove that the getMore won't time out waiting for the lock.
+ sleep(2 * getMoreMaxTimeMS);
+
+ // Then kill the command with the lock, allowing the getMore to continue successfully.
+ const sleepOps = db.getSiblingDB("admin").currentOp({"command.sleep": 1, active: true}).inprog;
+ assert.eq(sleepOps.length, 1);
+ const sleepOpId = sleepOps[0].opid;
+ assert.commandWorked(db.adminCommand({killOp: 1, op: sleepOpId}));
+
+ awaitSleepShell();
+ awaitGetMoreShell();
+}());
diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp
index 12102bf0ed4..e62e3667afd 100644
--- a/src/mongo/db/commands/getmore_cmd.cpp
+++ b/src/mongo/db/commands/getmore_cmd.cpp
@@ -384,7 +384,6 @@ public:
// any leftover time from the maxTimeMS of the operation that spawned this cursor,
// applying it to this getMore.
if (cursor->isAwaitData() && !disableAwaitDataFailpointActive) {
- opCtx->clearDeadline();
awaitDataState(opCtx).waitForInsertsDeadline =
opCtx->getServiceContext()->getPreciseClockSource()->now() +
request.awaitDataTimeout.value_or(Seconds{1});
diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h
index 7c7862c6629..1e53a5648e5 100644
--- a/src/mongo/db/operation_context.h
+++ b/src/mongo/db/operation_context.h
@@ -406,14 +406,6 @@ public:
}
/**
- * Reset the deadline for this operation.
- */
- void clearDeadline() {
- _deadline = Date_t::max();
- _maxTime = computeMaxTimeFromDeadline(_deadline);
- }
-
- /**
* Returns the number of milliseconds remaining for this operation's time limit or
* Milliseconds::max() if the operation has no time limit.
*/
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index d1b46dccb16..f460ffa9cd3 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -573,7 +573,6 @@ void execCommandDatabase(OperationContext* opCtx,
BSONElement cmdOptionMaxTimeMSField;
BSONElement allowImplicitCollectionCreationField;
BSONElement helpField;
- BSONElement queryOptionMaxTimeMSField;
StringMap<int> topLevelFields;
for (auto&& element : request.body) {
@@ -585,7 +584,8 @@ void execCommandDatabase(OperationContext* opCtx,
} else if (fieldName == CommandHelpers::kHelpFieldName) {
helpField = element;
} else if (fieldName == QueryRequest::queryOptionMaxTimeMS) {
- queryOptionMaxTimeMSField = element;
+ uasserted(ErrorCodes::InvalidOptions,
+ "no such command option $maxTimeMs; use maxTimeMS instead");
}
uassert(ErrorCodes::FailedToParse,
@@ -658,14 +658,16 @@ void execCommandDatabase(OperationContext* opCtx,
opCounters->gotCommand();
}
- // Handle command option maxTimeMS.
- int maxTimeMS = uassertStatusOK(QueryRequest::parseMaxTimeMS(cmdOptionMaxTimeMSField));
-
- uassert(ErrorCodes::InvalidOptions,
- "no such command option $maxTimeMs; use maxTimeMS instead",
- queryOptionMaxTimeMSField.eoo());
-
- if (maxTimeMS > 0) {
+ // Parse the 'maxTimeMS' command option, and use it to set a deadline for the operation on
+ // the OperationContext. The 'maxTimeMS' option unfortunately has a different meaning for a
+ // getMore command, where it is used to communicate the maximum time to wait for new inserts
+ // on tailable cursors, not as a deadline for the operation.
+ // TODO SERVER-34277 Remove the special handling for maxTimeMS for getMores. This will
+ // require introducing a new 'max await time' parameter for getMore, and eventually banning
+ // maxTimeMS altogether on a getMore command.
+ const int maxTimeMS =
+ uassertStatusOK(QueryRequest::parseMaxTimeMS(cmdOptionMaxTimeMSField));
+ if (maxTimeMS > 0 && command->getLogicalOp() != LogicalOp::opGetMore) {
uassert(40119,
"Illegal attempt to set operation deadline within DBDirectClient",
!opCtx->getClient()->isInDirectClient());
diff --git a/src/mongo/s/commands/cluster_getmore_cmd.cpp b/src/mongo/s/commands/cluster_getmore_cmd.cpp
index 70e8bdc7048..5455eb0f1ec 100644
--- a/src/mongo/s/commands/cluster_getmore_cmd.cpp
+++ b/src/mongo/s/commands/cluster_getmore_cmd.cpp
@@ -81,6 +81,10 @@ public:
return "retrieve more documents for a cursor id";
}
+ LogicalOp getLogicalOp() const final {
+ return LogicalOp::opGetMore;
+ }
+
Status checkAuthForCommand(Client* client,
const std::string& dbname,
const BSONObj& cmdObj) const final {
diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp
index baafa3bbce6..fa70221c4fd 100644
--- a/src/mongo/s/commands/strategy.cpp
+++ b/src/mongo/s/commands/strategy.cpp
@@ -280,18 +280,6 @@ void runCommand(OperationContext* opCtx,
const OpMsgRequest& request,
const NetworkOp opType,
BSONObjBuilder&& builder) {
- // Handle command option maxTimeMS first thing while processing the command so that the
- // subsequent code has the deadline available
- uassert(ErrorCodes::InvalidOptions,
- "no such command option $maxTimeMs; use maxTimeMS instead",
- request.body[QueryRequest::queryOptionMaxTimeMS].eoo());
-
- const int maxTimeMS = uassertStatusOK(
- QueryRequest::parseMaxTimeMS(request.body[QueryRequest::cmdOptionMaxTimeMS]));
- if (maxTimeMS > 0) {
- opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMS});
- }
-
auto const commandName = request.getCommandName();
auto const command = CommandHelpers::findCommand(commandName);
if (!command) {
@@ -303,6 +291,24 @@ void runCommand(OperationContext* opCtx,
return;
}
+ // Parse the 'maxTimeMS' command option, and use it to set a deadline for the operation on
+ // the OperationContext. Be sure to do this as soon as possible so that further processing by
+ // subsequent code has the deadline available. The 'maxTimeMS' option unfortunately has a
+ // different meaning for a getMore command, where it is used to communicate the maximum time to
+ // wait for new inserts on tailable cursors, not as a deadline for the operation.
+ // TODO SERVER-34277 Remove the special handling for maxTimeMS for getMores. This will
+ // require introducing a new 'max await time' parameter for getMore, and eventually banning
+ // maxTimeMS altogether on a getMore command.
+ uassert(ErrorCodes::InvalidOptions,
+ "no such command option $maxTimeMs; use maxTimeMS instead",
+ request.body[QueryRequest::queryOptionMaxTimeMS].eoo());
+ const int maxTimeMS = uassertStatusOK(
+ QueryRequest::parseMaxTimeMS(request.body[QueryRequest::cmdOptionMaxTimeMS]));
+ if (maxTimeMS > 0 && command->getLogicalOp() != LogicalOp::opGetMore) {
+ opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMS});
+ }
+ opCtx->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point.
+
// Set the logical optype, command object and namespace as soon as we identify the command. If
// the command does not define a fully-qualified namespace, set CurOp to the generic command
// namespace db.$cmd.
diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp
index 52726fba207..8c9e654f5bb 100644
--- a/src/mongo/s/query/cluster_find.cpp
+++ b/src/mongo/s/query/cluster_find.cpp
@@ -384,12 +384,9 @@ Status setUpOperationContextStateForGetMore(OperationContext* opCtx,
}
if (cursor->isTailableAndAwaitData()) {
- // A maxTimeMS specified on a tailable, awaitData cursor is special. Instead of imposing a
- // deadline on the operation, it is used to communicate how long the server should wait for
- // new results. Here we clear any deadline set during command processing and track the
- // deadline instead via the 'waitForInsertsDeadline' decoration. This deadline defaults to
- // 1 second if the user didn't specify a maxTimeMS.
- opCtx->clearDeadline();
+ // For tailable + awaitData cursors, the request may have indicated a maximum amount of time
+ // to wait for new data. If not, default it to 1 second. We track the deadline instead via
+ // the 'waitForInsertsDeadline' decoration.
auto timeout = request.awaitDataTimeout.value_or(Milliseconds{1000});
awaitDataState(opCtx).waitForInsertsDeadline =
opCtx->getServiceContext()->getPreciseClockSource()->now() + timeout;