diff options
author | Lingzhi Deng <lingzhi.deng@mongodb.com> | 2020-04-17 12:02:58 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-17 21:41:39 +0000 |
commit | 6bcda378bfa25ae70ac15508c171e5eeb2269958 (patch) | |
tree | 62d6276951f95034c69e40539b02fd1966f38661 | |
parent | 463cc815528b42c3c2f81dc7137cc27f7e7cc974 (diff) | |
download | mongo-6bcda378bfa25ae70ac15508c171e5eeb2269958.tar.gz |
SERVER-47062: Trigger hang analyzer for writeConcern timeout errors
-rw-r--r-- | jstests/noPassthrough/shell_cmd_assertions.js | 48 | ||||
-rw-r--r-- | jstests/replsets/agg_write_concern_zero_batch_size.js | 97 | ||||
-rw-r--r-- | src/mongo/shell/assert.js | 23 |
3 files changed, 124 insertions, 44 deletions
diff --git a/jstests/noPassthrough/shell_cmd_assertions.js b/jstests/noPassthrough/shell_cmd_assertions.js index 4072ca49965..1bb358dfce0 100644 --- a/jstests/noPassthrough/shell_cmd_assertions.js +++ b/jstests/noPassthrough/shell_cmd_assertions.js @@ -330,6 +330,54 @@ tests.push(function invalidResponsesAttemptToProvideInformationCommandFailed() { }); }); +tests.push(function assertCallsHangAnalyzer() { + function runAssertTest(f, expectCall) { + const oldMongoRunner = MongoRunner; + let runs = 0; + try { + MongoRunner.runHangAnalyzer = function() { + ++runs; + }; + f(); + assert(false); + } catch (e) { + if (expectCall) { + assert.eq(runs, 1); + } else { + assert.eq(runs, 0); + } + } finally { + MongoRunner = oldMongoRunner; + } + } + const nonTimeOutWriteConcernError = { + n: 1, + ok: 1, + writeConcernError: { + code: ErrorCodes.WriteConcernFailed, + codeName: "WriteConcernFailed", + errmsg: "foo", + }, + }; + + runAssertTest(() => assert.commandWorked(sampleWriteConcernError), true); + runAssertTest(() => assert.commandWorked(nonTimeOutWriteConcernError), false); + + runAssertTest(() => assert.commandFailed(sampleWriteConcernError), false); + + runAssertTest( + () => assert.commandFailedWithCode(sampleWriteConcernError, ErrorCodes.DuplicateKey), true); + runAssertTest( + () => assert.commandFailedWithCode(nonTimeOutWriteConcernError, ErrorCodes.DuplicateKey), + false); + runAssertTest( + () => assert.commandFailedWithCode(sampleWriteConcernError, ErrorCodes.WriteConcernFailed), + false); + + runAssertTest(() => assert.commandWorkedIgnoringWriteConcernErrors(sampleWriteConcernError), + false); +}); + tests.forEach((test) => { jsTest.log(`Starting test '${test.name}'`); setup(); diff --git a/jstests/replsets/agg_write_concern_zero_batch_size.js b/jstests/replsets/agg_write_concern_zero_batch_size.js index 7e9d91c70c7..4119a73d5bd 100644 --- a/jstests/replsets/agg_write_concern_zero_batch_size.js +++ b/jstests/replsets/agg_write_concern_zero_batch_size.js @@ -17,57 +17,66 @@ const source = testDB.agg_write_concern_zero_batch_size; const target = testDB.agg_write_concern_zero_batch_size_target; assert.commandWorked(source.insert([{_id: 0}, {_id: 1}, {_id: 2}])); -withEachKindOfWriteStage(target, (stageSpec) => { - assert.commandWorked(target.remove({})); +// This test will cause commands to fail with writeConcern timeout. This normally triggers the +// hang analyzer, but we do not want to run it on expected timeouts. Thus we temporarily disable +// it. +MongoRunner.runHangAnalyzer.disable(); - // Start an aggregate cursor with a writing stage, but use batchSize: 0 to prevent any - // writes from happening in this command. - const response = assert.commandWorked(testDB.runCommand({ - aggregate: source.getName(), - pipeline: [stageSpec], - writeConcern: {w: 2, wtimeout: 100}, - cursor: {batchSize: 0} - })); - assert.neq(response.cursor.id, 0); +try { + withEachKindOfWriteStage(target, (stageSpec) => { + assert.commandWorked(target.remove({})); - stopServerReplication(rst.getSecondary()); + // Start an aggregate cursor with a writing stage, but use batchSize: 0 to prevent any + // writes from happening in this command. + const response = assert.commandWorked(testDB.runCommand({ + aggregate: source.getName(), + pipeline: [stageSpec], + writeConcern: {w: 2, wtimeout: 100}, + cursor: {batchSize: 0} + })); + assert.neq(response.cursor.id, 0); - const getMoreResponse = assert.commandFailedWithCode( - testDB.runCommand({getMore: response.cursor.id, collection: source.getName()}), - ErrorCodes.WriteConcernFailed); + stopServerReplication(rst.getSecondary()); - // Test the same thing but using the shell helpers. - let error = assert.throws( - () => source - .aggregate([stageSpec], - {cursor: {batchSize: 0}, writeConcern: {w: 2, wtimeout: 100}}) - .itcount()); - // Unfortunately this is the best way we have to check that the cause of the failure was due - // to write concern. The aggregate shell helper will assert the command worked. When this - // fails (as we expect due to write concern) it will create a new error object which loses - // all structure and just preserves the information as text. - assert(error instanceof Error); - assert(tojson(error).indexOf("writeConcernError") != -1, tojson(error)); + const getMoreResponse = assert.commandFailedWithCode( + testDB.runCommand({getMore: response.cursor.id, collection: source.getName()}), + ErrorCodes.WriteConcernFailed); - // Now test without batchSize just to be sure. - error = - assert.throws(() => source.aggregate([stageSpec], {writeConcern: {w: 2, wtimeout: 100}})); - assert(error instanceof Error); - assert(tojson(error).indexOf("writeConcernError") != -1, tojson(error)); + // Test the same thing but using the shell helpers. + let error = assert.throws( + () => source + .aggregate([stageSpec], + {cursor: {batchSize: 0}, writeConcern: {w: 2, wtimeout: 100}}) + .itcount()); + // Unfortunately this is the best way we have to check that the cause of the failure was due + // to write concern. The aggregate shell helper will assert the command worked. When this + // fails (as we expect due to write concern) it will create a new error object which loses + // all structure and just preserves the information as text. + assert(error instanceof Error); + assert(tojson(error).indexOf("writeConcernError") != -1, tojson(error)); - // Now switch to legacy OP_GET_MORE read mode. We should get a different error indicating - // that using writeConcern in this way is unsupported. - source.getDB().getMongo().forceReadMode("legacy"); - error = assert.throws( - () => source - .aggregate([stageSpec], - {cursor: {batchSize: 0}, writeConcern: {w: 2, wtimeout: 100}}) - .itcount()); - assert.eq(error.code, 31124); - source.getDB().getMongo().forceReadMode("commands"); + // Now test without batchSize just to be sure. + error = assert.throws( + () => source.aggregate([stageSpec], {writeConcern: {w: 2, wtimeout: 100}})); + assert(error instanceof Error); + assert(tojson(error).indexOf("writeConcernError") != -1, tojson(error)); - restartServerReplication(rst.getSecondary()); -}); + // Now switch to legacy OP_GET_MORE read mode. We should get a different error indicating + // that using writeConcern in this way is unsupported. + source.getDB().getMongo().forceReadMode("legacy"); + error = assert.throws( + () => source + .aggregate([stageSpec], + {cursor: {batchSize: 0}, writeConcern: {w: 2, wtimeout: 100}}) + .itcount()); + assert.eq(error.code, 31124); + source.getDB().getMongo().forceReadMode("commands"); + + restartServerReplication(rst.getSecondary()); + }); +} finally { + MongoRunner.runHangAnalyzer.enable(); +} rst.stopSet(); }()); diff --git a/src/mongo/shell/assert.js b/src/mongo/shell/assert.js index 1c0724ce3cb..91edc460e5a 100644 --- a/src/mongo/shell/assert.js +++ b/src/mongo/shell/assert.js @@ -615,6 +615,24 @@ assert = (function() { } } + function _runHangAnalyzerIfWriteConcernTimedOut(res) { + const timeoutMsg = "waiting for replication timed out"; + let isWriteConcernTimeout = false; + if (_isWriteResultType(res)) { + if (res.hasWriteConcernError() && res.getWriteConcernError().errmsg === timeoutMsg) { + isWriteConcernTimeout = true; + } + } else if ((res.hasOwnProperty("errmsg") && res.errmsg === timeoutMsg) || + (res.hasOwnProperty("writeConcernError") && + res.writeConcernError.errmsg === timeoutMsg)) { + isWriteConcernTimeout = true; + } + if (isWriteConcernTimeout) { + print("Running hang analyzer for writeConcern timeout " + tojson(res)); + MongoRunner.runHangAnalyzer(); + } + } + function _assertCommandWorked(res, msg, {ignoreWriteErrors, ignoreWriteConcernErrors}) { _validateAssertionMessage(msg); _validateCommandResponse(res, "commandWorked"); @@ -641,6 +659,7 @@ assert = (function() { ignoreWriteErrors: ignoreWriteErrors, ignoreWriteConcernErrors: ignoreWriteConcernErrors })) { + _runHangAnalyzerIfWriteConcernTimedOut(res); doassert(makeFailMsg(), res); } } else if (res.hasOwnProperty("acknowledged")) { @@ -693,6 +712,7 @@ assert = (function() { // Handle raw command responses or cases like MapReduceResult which extend command // response. if (_rawReplyOkAndNoWriteErrors(res)) { + _runHangAnalyzerIfWriteConcernTimedOut(res); doassert(makeFailMsg(), res); } @@ -707,6 +727,7 @@ assert = (function() { } if (!foundCode) { + _runHangAnalyzerIfWriteConcernTimedOut(res); doassert(makeFailCodeMsg(), res); } } @@ -782,6 +803,7 @@ assert = (function() { } if (errMsg) { + _runHangAnalyzerIfWriteConcernTimedOut(res); doassert(_buildAssertionMessage(msg, errMsg), res); } @@ -843,6 +865,7 @@ assert = (function() { } if (errMsg) { + _runHangAnalyzerIfWriteConcernTimedOut(res); doassert(_buildAssertionMessage(msg, errMsg)); } |