summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Hirschhorn <max.hirschhorn@mongodb.com>2018-04-27 11:20:51 -0400
committerMax Hirschhorn <max.hirschhorn@mongodb.com>2018-04-27 11:20:51 -0400
commitc7d12379bcd047c923b72bd29ac99d05edfbb82a (patch)
tree6889312ed290fc923f974aaea3cc82d8371ec10b
parent69ba26c48be6a1ae334e6221765941a413e37bf5 (diff)
downloadmongo-c7d12379bcd047c923b72bd29ac99d05edfbb82a.tar.gz
SERVER-34665 Update definition of retryable error in the mongo shell.
Also exposes a way to explicitly trigger retargeting before the next operation is run on the DBClientRS underlying a replica set connection by calling ReplicaSetMonitor::failedHost().
-rw-r--r--jstests/libs/retryable_writes_util.js3
-rw-r--r--jstests/noPassthrough/shell_retry_writes_on_retryable_errors.js125
-rw-r--r--src/mongo/scripting/mozjs/mongo.cpp49
-rw-r--r--src/mongo/scripting/mozjs/mongo.h3
-rw-r--r--src/mongo/shell/session.js142
5 files changed, 250 insertions, 72 deletions
diff --git a/jstests/libs/retryable_writes_util.js b/jstests/libs/retryable_writes_util.js
index 6c5c244045f..795fc33c254 100644
--- a/jstests/libs/retryable_writes_util.js
+++ b/jstests/libs/retryable_writes_util.js
@@ -4,6 +4,9 @@
var RetryableWritesUtil = (function() {
/**
* Returns true if the error code is retryable, assuming the command is idempotent.
+ *
+ * TODO SERVER-34666: Expose the isRetryableCode() function that's defined in
+ * src/mongo/shell/session.js and use it here.
*/
function isRetryableCode(code) {
return ErrorCodes.isNetworkError(code) || ErrorCodes.isNotMasterError(code) ||
diff --git a/jstests/noPassthrough/shell_retry_writes_on_retryable_errors.js b/jstests/noPassthrough/shell_retry_writes_on_retryable_errors.js
new file mode 100644
index 00000000000..876ee8326d5
--- /dev/null
+++ b/jstests/noPassthrough/shell_retry_writes_on_retryable_errors.js
@@ -0,0 +1,125 @@
+/**
+ * Tests that the mongo shell retries exactly once on retryable errors.
+ *
+ * @tags: [requires_replication]
+ */
+(function() {
+ "use strict";
+
+ const rst = new ReplSetTest({nodes: 2});
+ rst.startSet();
+ rst.initiate();
+
+ const dbName = "test";
+ const collName = jsTest.name();
+
+ const rsConn = new Mongo(rst.getURL());
+ const db = rsConn.startSession({retryWrites: true}).getDatabase(dbName);
+
+ // We configure the mongo shell to log its retry attempts so there are more diagnostics
+ // available in case this test ever fails.
+ TestData.logRetryAttempts = true;
+
+ /**
+ * The testCommandIsRetried() function serves as the fixture for writing test cases which run
+ * commands against the server and assert that the mongo shell retries them correctly.
+ *
+ * The 'testFn' parameter is a function that performs an arbitrary number of operations against
+ * the database. The command requests that the mongo shell attempts to send to the server
+ * (including any command requests which are retried) are then specified as the sole argument to
+ * the 'assertFn' parameter.
+ *
+ * The testFn(enableCapture, disableCapture) function can also selectively turn on and off the
+ * capturing of command requests by calling the functions it receives for its first and second
+ * parameters, respectively.
+ */
+ function testCommandIsRetried(testFn, assertFn) {
+ const mongoRunCommandOriginal = Mongo.prototype.runCommand;
+ const cmdObjsSeen = [];
+
+ let shouldCaptureCmdObjs = true;
+
+ Mongo.prototype.runCommand = function runCommandSpy(dbName, cmdObj, options) {
+ if (shouldCaptureCmdObjs) {
+ cmdObjsSeen.push(cmdObj);
+ }
+
+ return mongoRunCommandOriginal.apply(this, arguments);
+ };
+
+ try {
+ assert.doesNotThrow(() => testFn(
+ () => {
+ shouldCaptureCmdObjs = true;
+ },
+ () => {
+ shouldCaptureCmdObjs = false;
+ }));
+ } finally {
+ Mongo.prototype.runCommand = mongoRunCommandOriginal;
+ }
+
+ if (cmdObjsSeen.length === 0) {
+ throw new Error("Mongo.prototype.runCommand() was never called: " + testFn.toString());
+ }
+
+ assertFn(cmdObjsSeen);
+ }
+
+ testCommandIsRetried(
+ function testInsertRetriedOnWriteConcernError(enableCapture, disableCapture) {
+ disableCapture();
+ const secondary = rst.getSecondary();
+ secondary.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "alwaysOn"});
+
+ try {
+ enableCapture();
+ const res = db[collName].insert({}, {writeConcern: {w: 2, wtimeout: 1000}});
+ assert.commandFailedWithCode(res, ErrorCodes.WriteConcernFailed);
+ disableCapture();
+ } finally {
+ // We disable the failpoint in a finally block to prevent a misleading fassert()
+ // message from being logged by the secondary when it is shut down with the
+ // failpoint enabled.
+ secondary.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "off"});
+ }
+ },
+ function assertInsertRetriedExactlyOnce(cmdObjsSeen) {
+ assert.eq(2, cmdObjsSeen.length, () => tojson(cmdObjsSeen));
+ assert(cmdObjsSeen.every(cmdObj => Object.keys(cmdObj)[0] === "insert"),
+ () => "expected both attempts to be insert requests: " + tojson(cmdObjsSeen));
+ assert.eq(
+ cmdObjsSeen[0], cmdObjsSeen[1], "command request changed between retry attempts");
+ });
+
+ testCommandIsRetried(
+ function testUpdateRetriedOnRetryableCommandError(enableCapture, disableCapture) {
+ disableCapture();
+
+ const primary = rst.getPrimary();
+ primary.adminCommand({
+ configureFailPoint: "onPrimaryTransactionalWrite",
+ data: {
+ closeConnection: false,
+ failBeforeCommitExceptionCode: ErrorCodes.InterruptedDueToReplStateChange
+ },
+ mode: {times: 1}
+ });
+
+ enableCapture();
+ const res = db[collName].update({}, {$set: {a: 1}});
+ assert.commandWorked(res);
+ disableCapture();
+
+ primary.adminCommand({configureFailPoint: "onPrimaryTransactionalWrite", mode: "off"});
+ },
+ function assertUpdateRetriedExactlyOnce(cmdObjsSeen) {
+ assert.eq(2, cmdObjsSeen.length, () => tojson(cmdObjsSeen));
+ assert(cmdObjsSeen.every(cmdObj => Object.keys(cmdObj)[0] === "update"),
+ () => "expected both attempts to be update requests: " + tojson(cmdObjsSeen));
+ assert.eq(
+ cmdObjsSeen[0], cmdObjsSeen[1], "command request changed between retry attempts");
+ });
+
+ rst.stopSet();
+})();
diff --git a/src/mongo/scripting/mozjs/mongo.cpp b/src/mongo/scripting/mozjs/mongo.cpp
index 249792266a7..ba86facd270 100644
--- a/src/mongo/scripting/mozjs/mongo.cpp
+++ b/src/mongo/scripting/mozjs/mongo.cpp
@@ -31,10 +31,12 @@
#include "mongo/scripting/mozjs/mongo.h"
#include "mongo/bson/simple_bsonelement_comparator.h"
+#include "mongo/client/dbclient_rs.h"
#include "mongo/client/dbclientinterface.h"
#include "mongo/client/global_conn_pool.h"
#include "mongo/client/mongo_uri.h"
#include "mongo/client/native_sasl_client_session.h"
+#include "mongo/client/replica_set_monitor.h"
#include "mongo/client/sasl_client_authenticate.h"
#include "mongo/client/sasl_client_session.h"
#include "mongo/db/auth/sasl_command_constants.h"
@@ -73,6 +75,7 @@ const JSFunctionSpec MongoBase::methods[] = {
MONGO_ATTACH_JS_CONSTRAINED_METHOD_NO_PROTO(insert, MongoLocalInfo, MongoExternalInfo),
MONGO_ATTACH_JS_CONSTRAINED_METHOD_NO_PROTO(
isReplicaSetConnection, MongoLocalInfo, MongoExternalInfo),
+ MONGO_ATTACH_JS_CONSTRAINED_METHOD_NO_PROTO(_markNodeAsFailed, MongoExternalInfo),
MONGO_ATTACH_JS_CONSTRAINED_METHOD_NO_PROTO(logout, MongoLocalInfo, MongoExternalInfo),
MONGO_ATTACH_JS_CONSTRAINED_METHOD_NO_PROTO(remove, MongoLocalInfo, MongoExternalInfo),
MONGO_ATTACH_JS_CONSTRAINED_METHOD_NO_PROTO(runCommand, MongoLocalInfo, MongoExternalInfo),
@@ -159,18 +162,23 @@ void setHiddenMongo(JSContext* cx,
} else {
scope->getProto<MongoExternalInfo>().newObject(&newMongo);
}
+
+ auto host = resPtr->getServerAddress();
JS_SetPrivate(newMongo,
scope->trackedNew<std::shared_ptr<DBClientBase>>(std::move(resPtr)));
ObjectWrapper from(cx, args.thisv());
ObjectWrapper to(cx, newMongo);
- for (const auto& k :
- {InternedString::slaveOk, InternedString::defaultDB, InternedString::host}) {
+ for (const auto& k : {InternedString::slaveOk, InternedString::defaultDB}) {
JS::RootedValue tmpValue(cx);
from.getValue(k, &tmpValue);
to.setValue(k, tmpValue);
}
+ // 'newMongo' is a direct connection to an individual server. Its "host" property therefore
+ // reports the stringified HostAndPort of the underlying DBClientConnection.
+ to.setString(InternedString::host, host);
+
JS::RootedValue value(cx);
value.setObjectOrNull(newMongo);
@@ -667,6 +675,43 @@ void MongoBase::Functions::isReplicaSetConnection::call(JSContext* cx, JS::CallA
args.rval().setBoolean(conn->type() == ConnectionString::ConnectionType::SET);
}
+void MongoBase::Functions::_markNodeAsFailed::call(JSContext* cx, JS::CallArgs args) {
+ if (args.length() != 3) {
+ uasserted(ErrorCodes::BadValue, "_markNodeAsFailed needs 3 args");
+ }
+
+ if (!args.get(0).isString()) {
+ uasserted(ErrorCodes::BadValue,
+ "first argument to _markNodeAsFailed must be a stringified host and port");
+ }
+
+ if (!args.get(1).isNumber()) {
+ uasserted(ErrorCodes::BadValue,
+ "second argument to _markNodeAsFailed must be a numeric error code");
+ }
+
+ if (!args.get(2).isString()) {
+ uasserted(ErrorCodes::BadValue,
+ "third argument to _markNodeAsFailed must be a stringified reason");
+ }
+
+ auto* rsConn = dynamic_cast<DBClientReplicaSet*>(getConnection(args));
+ if (!rsConn) {
+ uasserted(ErrorCodes::BadValue, "connection object isn't a replica set connection");
+ }
+
+ auto hostAndPort = ValueWriter(cx, args.get(0)).toString();
+ auto code = ValueWriter(cx, args.get(1)).toInt32();
+ auto reason = ValueWriter(cx, args.get(2)).toString();
+
+ const auto& replicaSetName = rsConn->getSetName();
+ ReplicaSetMonitor::get(replicaSetName)
+ ->failedHost(HostAndPort(hostAndPort),
+ Status{static_cast<ErrorCodes::Error>(code), reason});
+
+ args.rval().setUndefined();
+}
+
void MongoLocalInfo::construct(JSContext* cx, JS::CallArgs args) {
auto scope = getScope(cx);
diff --git a/src/mongo/scripting/mozjs/mongo.h b/src/mongo/scripting/mozjs/mongo.h
index 8c67677ddea..36fbc46a172 100644
--- a/src/mongo/scripting/mozjs/mongo.h
+++ b/src/mongo/scripting/mozjs/mongo.h
@@ -54,6 +54,7 @@ struct MongoBase : public BaseInfo {
MONGO_DECLARE_JS_FUNCTION(getServerRPCProtocols);
MONGO_DECLARE_JS_FUNCTION(insert);
MONGO_DECLARE_JS_FUNCTION(isReplicaSetConnection);
+ MONGO_DECLARE_JS_FUNCTION(_markNodeAsFailed);
MONGO_DECLARE_JS_FUNCTION(logout);
MONGO_DECLARE_JS_FUNCTION(remove);
MONGO_DECLARE_JS_FUNCTION(runCommand);
@@ -67,7 +68,7 @@ struct MongoBase : public BaseInfo {
MONGO_DECLARE_JS_FUNCTION(_startSession);
};
- static const JSFunctionSpec methods[22];
+ static const JSFunctionSpec methods[23];
static const char* const className;
static const unsigned classFlags = JSCLASS_HAS_PRIVATE;
diff --git a/src/mongo/shell/session.js b/src/mongo/shell/session.js
index c1bc617c4d1..08bce40b94d 100644
--- a/src/mongo/shell/session.js
+++ b/src/mongo/shell/session.js
@@ -297,43 +297,17 @@ var {
/**
* Returns true if the error code is retryable, assuming the command is idempotent.
+ *
+ * The Retryable Writes specification defines a RetryableError as any network error, any of
+ * the following error codes, or an error response with a different code containing the
+ * phrase "not master" or "node is recovering".
+ *
+ * https://github.com/mongodb/specifications/blob/5b53e0baca18ba111364d479a37fa9195ef801a6/
+ * source/retryable-writes/retryable-writes.rst#terms
*/
function isRetryableCode(code) {
return ErrorCodes.isNetworkError(code) || ErrorCodes.isNotMasterError(code) ||
- // The driver's spec does not allow retrying on writeConcern errors, so only do so
- // when testing retryable writes.
- (jsTest.options().alwaysInjectTransactionNumber &&
- ErrorCodes.isWriteConcernError(code));
- }
-
- /**
- * Returns the error code from a write response that should be used in the check for
- * retryability.
- */
- function getEffectiveWriteErrorCode(res) {
- let code;
- if (res instanceof WriteResult) {
- if (res.hasWriteError()) {
- code = res.getWriteError().code;
- } else if (res.hasWriteConcernError()) {
- code = res.getWriteConcernError().code;
- }
- } else if (res instanceof BulkWriteResult) {
- if (res.hasWriteErrors()) {
- code = res.getWriteErrorAt(0).code;
- } else if (res.hasWriteConcernError()) {
- code = res.getWriteConcernError().code;
- }
- } else {
- if (res.writeError) {
- code = res.writeError.code;
- } else if (res.writeErrors) {
- code = res.writeErrors[0].code;
- } else if (res.writeConcernError) {
- code = res.writeConcernError.code;
- }
- }
- return code;
+ ErrorCodes.isShutdownError(code) || ErrorCodes.WriteConcernFailed === code;
}
function runClientFunctionWithRetries(
@@ -362,37 +336,10 @@ var {
}
do {
- try {
- let res = clientFunction.apply(client, clientFunctionArguments);
+ let res;
- if (numRetries > 0) {
- if (!res.ok && isRetryableCode(res.code)) {
- // Don't decrement retries, because the command returned before the
- // connection was closed, so a subsequent attempt will receive a network
- // error (or NotMaster error) and need to retry.
- if (jsTest.options().logRetryAttempts) {
- print("=-=-=-= Retrying failed response with retryable code: " +
- res.code + ", for command: " + cmdName +
- ", retries remaining: " + numRetries);
- }
- continue;
- }
-
- let code = getEffectiveWriteErrorCode(res);
- if (isRetryableCode(code)) {
- // Don't decrement retries, because the command returned before the
- // connection was closed, so a subsequent attempt will receive a network
- // error (or NotMaster error) and need to retry.
- if (jsTest.options().logRetryAttempts) {
- print("=-=-=-= Retrying write with retryable write error code: " +
- code + ", for command: " + cmdName + ", retries remaining: " +
- numRetries);
- }
- continue;
- }
- }
-
- return res;
+ try {
+ res = clientFunction.apply(client, clientFunctionArguments);
} catch (e) {
if (!isNetworkError(e) || numRetries === 0) {
throw e;
@@ -418,12 +365,69 @@ var {
}
}
- --numRetries;
- if (jsTest.options().logRetryAttempts) {
- print("=-=-=-= Retrying on network error for command: " + cmdName +
- ", retries remaining: " + numRetries);
+ if (numRetries > 0) {
+ --numRetries;
+
+ if (res === undefined) {
+ if (jsTest.options().logRetryAttempts) {
+ jsTest.log("Retrying " + cmdName +
+ " due to network error, subsequent retries remaining: " +
+ numRetries);
+ }
+ continue;
+ }
+
+ if (isRetryableCode(res.code)) {
+ if (jsTest.options().logRetryAttempts) {
+ jsTest.log("Retrying " + cmdName + " due to retryable error (code=" +
+ res.code + "), subsequent retries remaining: " + numRetries);
+ }
+ if (client.isReplicaSetConnection()) {
+ client._markNodeAsFailed(res._mongo.host, res.code, res.errmsg);
+ }
+ continue;
+ }
+
+ if (Array.isArray(res.writeErrors)) {
+ // If any of the write operations in the batch fails with a retryable error,
+ // then we retry the entire batch.
+ const writeError =
+ res.writeErrors.find((writeError) => isRetryableCode(writeError.code));
+
+ if (writeError !== undefined) {
+ if (jsTest.options().logRetryAttempts) {
+ jsTest.log("Retrying " + cmdName +
+ " due to retryable write error (code=" +
+ writeError.code + "), subsequent retries remaining: " +
+ numRetries);
+ }
+ if (client.isReplicaSetConnection()) {
+ client._markNodeAsFailed(
+ res._mongo.host, writeError.code, writeError.errmsg);
+ }
+ continue;
+ }
+ }
+
+ if (res.hasOwnProperty("writeConcernError") &&
+ isRetryableCode(res.writeConcernError.code)) {
+ if (jsTest.options().logRetryAttempts) {
+ jsTest.log("Retrying " + cmdName +
+ " due to retryable write concern error (code=" +
+ res.writeConcernError.code +
+ "), subsequent retries remaining: " + numRetries);
+ }
+ if (client.isReplicaSetConnection()) {
+ client._markNodeAsFailed(res._mongo.host,
+ res.writeConcernError.code,
+ res.writeConcernError.errmsg);
+ }
+ continue;
+ }
}
- } while (numRetries >= 0);
+
+ return res;
+ } while (true);
}
this.runCommand = function runCommand(driverSession, dbName, cmdObj, options) {