summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLingzhi Deng <lingzhi.deng@mongodb.com>2020-04-17 12:02:58 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-17 21:41:39 +0000
commit6bcda378bfa25ae70ac15508c171e5eeb2269958 (patch)
tree62d6276951f95034c69e40539b02fd1966f38661
parent463cc815528b42c3c2f81dc7137cc27f7e7cc974 (diff)
downloadmongo-6bcda378bfa25ae70ac15508c171e5eeb2269958.tar.gz
SERVER-47062: Trigger hang analyzer for writeConcern timeout errors
-rw-r--r--jstests/noPassthrough/shell_cmd_assertions.js48
-rw-r--r--jstests/replsets/agg_write_concern_zero_batch_size.js97
-rw-r--r--src/mongo/shell/assert.js23
3 files changed, 124 insertions, 44 deletions
diff --git a/jstests/noPassthrough/shell_cmd_assertions.js b/jstests/noPassthrough/shell_cmd_assertions.js
index 4072ca49965..1bb358dfce0 100644
--- a/jstests/noPassthrough/shell_cmd_assertions.js
+++ b/jstests/noPassthrough/shell_cmd_assertions.js
@@ -330,6 +330,54 @@ tests.push(function invalidResponsesAttemptToProvideInformationCommandFailed() {
});
});
+tests.push(function assertCallsHangAnalyzer() {
+ function runAssertTest(f, expectCall) {
+ const oldMongoRunner = MongoRunner;
+ let runs = 0;
+ try {
+ MongoRunner.runHangAnalyzer = function() {
+ ++runs;
+ };
+ f();
+ assert(false);
+ } catch (e) {
+ if (expectCall) {
+ assert.eq(runs, 1);
+ } else {
+ assert.eq(runs, 0);
+ }
+ } finally {
+ MongoRunner = oldMongoRunner;
+ }
+ }
+ const nonTimeOutWriteConcernError = {
+ n: 1,
+ ok: 1,
+ writeConcernError: {
+ code: ErrorCodes.WriteConcernFailed,
+ codeName: "WriteConcernFailed",
+ errmsg: "foo",
+ },
+ };
+
+ runAssertTest(() => assert.commandWorked(sampleWriteConcernError), true);
+ runAssertTest(() => assert.commandWorked(nonTimeOutWriteConcernError), false);
+
+ runAssertTest(() => assert.commandFailed(sampleWriteConcernError), false);
+
+ runAssertTest(
+ () => assert.commandFailedWithCode(sampleWriteConcernError, ErrorCodes.DuplicateKey), true);
+ runAssertTest(
+ () => assert.commandFailedWithCode(nonTimeOutWriteConcernError, ErrorCodes.DuplicateKey),
+ false);
+ runAssertTest(
+ () => assert.commandFailedWithCode(sampleWriteConcernError, ErrorCodes.WriteConcernFailed),
+ false);
+
+ runAssertTest(() => assert.commandWorkedIgnoringWriteConcernErrors(sampleWriteConcernError),
+ false);
+});
+
tests.forEach((test) => {
jsTest.log(`Starting test '${test.name}'`);
setup();
diff --git a/jstests/replsets/agg_write_concern_zero_batch_size.js b/jstests/replsets/agg_write_concern_zero_batch_size.js
index 7e9d91c70c7..4119a73d5bd 100644
--- a/jstests/replsets/agg_write_concern_zero_batch_size.js
+++ b/jstests/replsets/agg_write_concern_zero_batch_size.js
@@ -17,57 +17,66 @@ const source = testDB.agg_write_concern_zero_batch_size;
const target = testDB.agg_write_concern_zero_batch_size_target;
assert.commandWorked(source.insert([{_id: 0}, {_id: 1}, {_id: 2}]));
-withEachKindOfWriteStage(target, (stageSpec) => {
- assert.commandWorked(target.remove({}));
+// This test will cause commands to fail with writeConcern timeout. This normally triggers the
+// hang analyzer, but we do not want to run it on expected timeouts. Thus we temporarily disable
+// it.
+MongoRunner.runHangAnalyzer.disable();
- // Start an aggregate cursor with a writing stage, but use batchSize: 0 to prevent any
- // writes from happening in this command.
- const response = assert.commandWorked(testDB.runCommand({
- aggregate: source.getName(),
- pipeline: [stageSpec],
- writeConcern: {w: 2, wtimeout: 100},
- cursor: {batchSize: 0}
- }));
- assert.neq(response.cursor.id, 0);
+try {
+ withEachKindOfWriteStage(target, (stageSpec) => {
+ assert.commandWorked(target.remove({}));
- stopServerReplication(rst.getSecondary());
+ // Start an aggregate cursor with a writing stage, but use batchSize: 0 to prevent any
+ // writes from happening in this command.
+ const response = assert.commandWorked(testDB.runCommand({
+ aggregate: source.getName(),
+ pipeline: [stageSpec],
+ writeConcern: {w: 2, wtimeout: 100},
+ cursor: {batchSize: 0}
+ }));
+ assert.neq(response.cursor.id, 0);
- const getMoreResponse = assert.commandFailedWithCode(
- testDB.runCommand({getMore: response.cursor.id, collection: source.getName()}),
- ErrorCodes.WriteConcernFailed);
+ stopServerReplication(rst.getSecondary());
- // Test the same thing but using the shell helpers.
- let error = assert.throws(
- () => source
- .aggregate([stageSpec],
- {cursor: {batchSize: 0}, writeConcern: {w: 2, wtimeout: 100}})
- .itcount());
- // Unfortunately this is the best way we have to check that the cause of the failure was due
- // to write concern. The aggregate shell helper will assert the command worked. When this
- // fails (as we expect due to write concern) it will create a new error object which loses
- // all structure and just preserves the information as text.
- assert(error instanceof Error);
- assert(tojson(error).indexOf("writeConcernError") != -1, tojson(error));
+ const getMoreResponse = assert.commandFailedWithCode(
+ testDB.runCommand({getMore: response.cursor.id, collection: source.getName()}),
+ ErrorCodes.WriteConcernFailed);
- // Now test without batchSize just to be sure.
- error =
- assert.throws(() => source.aggregate([stageSpec], {writeConcern: {w: 2, wtimeout: 100}}));
- assert(error instanceof Error);
- assert(tojson(error).indexOf("writeConcernError") != -1, tojson(error));
+ // Test the same thing but using the shell helpers.
+ let error = assert.throws(
+ () => source
+ .aggregate([stageSpec],
+ {cursor: {batchSize: 0}, writeConcern: {w: 2, wtimeout: 100}})
+ .itcount());
+ // Unfortunately this is the best way we have to check that the cause of the failure was due
+ // to write concern. The aggregate shell helper will assert the command worked. When this
+ // fails (as we expect due to write concern) it will create a new error object which loses
+ // all structure and just preserves the information as text.
+ assert(error instanceof Error);
+ assert(tojson(error).indexOf("writeConcernError") != -1, tojson(error));
- // Now switch to legacy OP_GET_MORE read mode. We should get a different error indicating
- // that using writeConcern in this way is unsupported.
- source.getDB().getMongo().forceReadMode("legacy");
- error = assert.throws(
- () => source
- .aggregate([stageSpec],
- {cursor: {batchSize: 0}, writeConcern: {w: 2, wtimeout: 100}})
- .itcount());
- assert.eq(error.code, 31124);
- source.getDB().getMongo().forceReadMode("commands");
+ // Now test without batchSize just to be sure.
+ error = assert.throws(
+ () => source.aggregate([stageSpec], {writeConcern: {w: 2, wtimeout: 100}}));
+ assert(error instanceof Error);
+ assert(tojson(error).indexOf("writeConcernError") != -1, tojson(error));
- restartServerReplication(rst.getSecondary());
-});
+ // Now switch to legacy OP_GET_MORE read mode. We should get a different error indicating
+ // that using writeConcern in this way is unsupported.
+ source.getDB().getMongo().forceReadMode("legacy");
+ error = assert.throws(
+ () => source
+ .aggregate([stageSpec],
+ {cursor: {batchSize: 0}, writeConcern: {w: 2, wtimeout: 100}})
+ .itcount());
+ assert.eq(error.code, 31124);
+ source.getDB().getMongo().forceReadMode("commands");
+
+ restartServerReplication(rst.getSecondary());
+ });
+} finally {
+ MongoRunner.runHangAnalyzer.enable();
+}
rst.stopSet();
}());
diff --git a/src/mongo/shell/assert.js b/src/mongo/shell/assert.js
index 1c0724ce3cb..91edc460e5a 100644
--- a/src/mongo/shell/assert.js
+++ b/src/mongo/shell/assert.js
@@ -615,6 +615,24 @@ assert = (function() {
}
}
+ function _runHangAnalyzerIfWriteConcernTimedOut(res) {
+ const timeoutMsg = "waiting for replication timed out";
+ let isWriteConcernTimeout = false;
+ if (_isWriteResultType(res)) {
+ if (res.hasWriteConcernError() && res.getWriteConcernError().errmsg === timeoutMsg) {
+ isWriteConcernTimeout = true;
+ }
+ } else if ((res.hasOwnProperty("errmsg") && res.errmsg === timeoutMsg) ||
+ (res.hasOwnProperty("writeConcernError") &&
+ res.writeConcernError.errmsg === timeoutMsg)) {
+ isWriteConcernTimeout = true;
+ }
+ if (isWriteConcernTimeout) {
+ print("Running hang analyzer for writeConcern timeout " + tojson(res));
+ MongoRunner.runHangAnalyzer();
+ }
+ }
+
function _assertCommandWorked(res, msg, {ignoreWriteErrors, ignoreWriteConcernErrors}) {
_validateAssertionMessage(msg);
_validateCommandResponse(res, "commandWorked");
@@ -641,6 +659,7 @@ assert = (function() {
ignoreWriteErrors: ignoreWriteErrors,
ignoreWriteConcernErrors: ignoreWriteConcernErrors
})) {
+ _runHangAnalyzerIfWriteConcernTimedOut(res);
doassert(makeFailMsg(), res);
}
} else if (res.hasOwnProperty("acknowledged")) {
@@ -693,6 +712,7 @@ assert = (function() {
// Handle raw command responses or cases like MapReduceResult which extend command
// response.
if (_rawReplyOkAndNoWriteErrors(res)) {
+ _runHangAnalyzerIfWriteConcernTimedOut(res);
doassert(makeFailMsg(), res);
}
@@ -707,6 +727,7 @@ assert = (function() {
}
if (!foundCode) {
+ _runHangAnalyzerIfWriteConcernTimedOut(res);
doassert(makeFailCodeMsg(), res);
}
}
@@ -782,6 +803,7 @@ assert = (function() {
}
if (errMsg) {
+ _runHangAnalyzerIfWriteConcernTimedOut(res);
doassert(_buildAssertionMessage(msg, errMsg), res);
}
@@ -843,6 +865,7 @@ assert = (function() {
}
if (errMsg) {
+ _runHangAnalyzerIfWriteConcernTimedOut(res);
doassert(_buildAssertionMessage(msg, errMsg));
}