diff options
author | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2020-02-27 03:03:47 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-02-27 09:19:44 +0000 |
commit | b4f0cfe1d7fc292728f27edf7b3eee90ba5fd5b4 (patch) | |
tree | 3ae20abe90c6c81c102a99de2f81bef6a844d367 | |
parent | fcd2dd41189fffc6e67a8645b99974178f87ca04 (diff) | |
download | mongo-b4f0cfe1d7fc292728f27edf7b3eee90ba5fd5b4.tar.gz |
SERVER-43867 Work around unrecoverability of rollback via refetch in tests SERVER-43867 Move check_log inside src/mongo/shell/
(cherry picked from commit 7c42573b96ecfa43fbf76aa01cd659ee00fc6103)
(cherry picked from commit 9e69da32cea83fc45724213215a8de40d2635f45)
-rw-r--r-- | jstests/replsets/libs/rollback_test.js | 31 | ||||
-rw-r--r-- | jstests/replsets/unrecoverable_rollback_early_exit.js | 66 | ||||
-rw-r--r-- | src/mongo/scripting/engine.cpp | 2 | ||||
-rw-r--r-- | src/mongo/shell/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/shell/check_log.js | 120 | ||||
-rw-r--r-- | src/mongo/shell/replsettest.js | 45 |
6 files changed, 264 insertions, 1 deletions
diff --git a/jstests/replsets/libs/rollback_test.js b/jstests/replsets/libs/rollback_test.js index ce026c6a1f5..ed20b18bb3b 100644 --- a/jstests/replsets/libs/rollback_test.js +++ b/jstests/replsets/libs/rollback_test.js @@ -61,6 +61,8 @@ function RollbackTest(name = "RollbackTest", replSet) { const SIGTERM = 15; const kNumDataBearingNodes = 2; + let awaitSecondaryNodesForRollbackTimeout; + let rst; let curPrimary; let curSecondary; @@ -213,7 +215,25 @@ function RollbackTest(name = "RollbackTest", replSet) { `may prevent a rollback here.`); } - rst.awaitSecondaryNodes(); + // If the rollback node has {enableMajorityReadConcern:false} set, it will use the + // rollbackViaRefetch algorithm. That can lead to unrecoverable rollbacks, particularly + // in unclean shutdown suites, as it it is possible in rare cases for the sync source to + // lose the entry corresponding to the optime the rollback node chose as its minValid. + try { + rst.awaitSecondaryNodesForRollbackTest( + awaitSecondaryNodesForRollbackTimeout, + curSecondary /* connToCheckForUnrecoverableRollback */); + } catch (e) { + if (e.unrecoverableRollbackDetected) { + log(`Detected unrecoverable rollback on ${curSecondary.host}. Ending test.`, + true /* important */); + TestData.skipCheckDBHashes = true; + rst.stopSet(); + quit(); + } + // Re-throw the original exception in all other cases. + throw e; + } rst.awaitReplication(); log(`Rollback on ${curSecondary.host} (if needed) and awaitReplication completed`, true); @@ -391,4 +411,13 @@ function RollbackTest(name = "RollbackTest", replSet) { this.getTestFixture = function() { return rst; }; + + /** + * Use this to control the timeout being used in the awaitSecondaryNodesForRollbackTest call + * in transitionToSteadyStateOperations. + * For use only in tests that expect unrecoverable rollbacks. + */ + this.setAwaitSecondaryNodesForRollbackTimeout = function(timeoutMillis) { + awaitSecondaryNodesForRollbackTimeout = timeoutMillis; + }; } diff --git a/jstests/replsets/unrecoverable_rollback_early_exit.js b/jstests/replsets/unrecoverable_rollback_early_exit.js new file mode 100644 index 00000000000..d729b34dd0c --- /dev/null +++ b/jstests/replsets/unrecoverable_rollback_early_exit.js @@ -0,0 +1,66 @@ +/** + * This test exercises an unrecoverable rollback using rollback_test.js, expecting it to terminate + * cleanly early instead of failing. + * An unrecoverable rollback can happen with EMRC:false, as it is possible for rollback via refetch + * to set a minValid based on oplog entries that the sync source may have failed to recover after + * an unclean shutdown. The rollback node will need to sync and apply oplog entries up to minValid + * to be consistent, but if those oplog entries no longer exist, then it will be stuck in sync + * source selection and unable to complete recovery. + * This test reproduces this scenario in a simpler form by modifying the minValid on the rollback + * node very far forward, so that we do not have to simulate anything happening to the sync source. + */ + +(function() { + "use strict"; + + load("jstests/replsets/libs/rollback_test.js"); + + function tsToDate(ts) { + return new Date(ts.getTime() * 1000); + } + + const testName = jsTestName(); + + const rst = new ReplSetTest({ + name: testName, + nodes: [{}, {}, {rsConfig: {arbiterOnly: true}}], + useBridge: true, + settings: {chainingAllowed: false}, + nodeOptions: {enableMajorityReadConcern: "false"} + }); + rst.startSet(); + rst.initiate(); + + const rollbackTest = new RollbackTest(testName, rst); + const rollbackNode = rollbackTest.transitionToRollbackOperations(); + + assert.commandWorked(rollbackNode.adminCommand( + {configureFailPoint: "rollbackHangBeforeFinish", mode: "alwaysOn"})); + + rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); + rollbackTest.transitionToSyncSourceOperationsDuringRollback(); + + checkLog.contains(rollbackNode, "rollbackHangBeforeFinish fail point enabled."); + + const farFutureTS = new Timestamp( + Math.floor(new Date().getTime() / 1000) + (60 * 60 * 24 * 5 /* in five days*/), 0); + + jsTest.log("future TS: " + tojson(farFutureTS) + ", date:" + tsToDate(farFutureTS)); + + const mMinvalid = rollbackNode.getDB("local").getCollection("replset.minvalid"); + + const minValidUpdate = {$set: {ts: farFutureTS}}; + jsTestLog("Current minValid is " + tojson(mMinvalid.findOne())); + jsTestLog("Updating minValid to: " + tojson(minValidUpdate)); + printjson(assert.commandWorked(mMinvalid.update({}, minValidUpdate))); + + assert.commandWorked( + rollbackNode.adminCommand({configureFailPoint: "rollbackHangBeforeFinish", mode: "off"})); + + rollbackTest.setAwaitSecondaryNodesForRollbackTimeout(5 * 1000); + + // We will detect an unrecoverable rollback here. + rollbackTest.transitionToSteadyStateOperations(); + + rollbackTest.stop(); +})();
\ No newline at end of file diff --git a/src/mongo/scripting/engine.cpp b/src/mongo/scripting/engine.cpp index 6ecabd1fd8d..952fd67ac9d 100644 --- a/src/mongo/scripting/engine.cpp +++ b/src/mongo/scripting/engine.cpp @@ -293,6 +293,7 @@ ScriptingFunction Scope::createFunction(const char* code) { } namespace JSFiles { +extern const JSFile check_log; extern const JSFile collection; extern const JSFile crud_api; extern const JSFile db; @@ -320,6 +321,7 @@ void Scope::execCoreFiles() { execSetup(JSFiles::query); execSetup(JSFiles::bulk_api); execSetup(JSFiles::error_codes); + execSetup(JSFiles::check_log); execSetup(JSFiles::collection); execSetup(JSFiles::crud_api); execSetup(JSFiles::explain_query); diff --git a/src/mongo/shell/SConscript b/src/mongo/shell/SConscript index 79e956ab06e..e534658b9ae 100644 --- a/src/mongo/shell/SConscript +++ b/src/mongo/shell/SConscript @@ -33,6 +33,7 @@ js_header = env.JSHeader( source=[ "assert.js", "bulk_api.js", + "check_log.js", "collection.js", "crud_api.js", "db.js", diff --git a/src/mongo/shell/check_log.js b/src/mongo/shell/check_log.js new file mode 100644 index 00000000000..d49955f5786 --- /dev/null +++ b/src/mongo/shell/check_log.js @@ -0,0 +1,120 @@ +/* + * Helper functions which connect to a server, and check its logs for particular strings. + */ +var checkLog; + +(function() { + "use strict"; + + if (checkLog) { + return; // Protect against this file being double-loaded. + } + + checkLog = (function() { + var getGlobalLog = function(conn) { + var cmdRes; + try { + cmdRes = conn.adminCommand({getLog: 'global'}); + } catch (e) { + // Retry with network errors. + print("checkLog ignoring failure: " + e); + return null; + } + + return assert.commandWorked(cmdRes).log; + }; + + /* + * Calls the 'getLog' function on the provided connection 'conn' to see if the provided msg + * is found in the logs. Note: this function does not throw an exception, so the return + * value should not be ignored. + */ + const checkContainsOnce = function(conn, msg) { + const logMessages = getGlobalLog(conn); + if (logMessages === null) { + return false; + } + for (let logMsg of logMessages) { + if (logMsg.includes(msg)) { + return true; + } + } + return false; + }; + + /* + * Calls the 'getLog' function at regular intervals on the provided connection 'conn' until + * the provided 'msg' is found in the logs, or it times out. Throws an exception on timeout. + */ + var contains = function(conn, msg, timeout = 5 * 60 * 1000) { + assert.soon(function() { + return checkContainsOnce(conn, msg); + }, 'Could not find log entries containing the following message: ' + msg, timeout, 300); + }; + + /* + * Calls the 'getLog' function at regular intervals on the provided connection 'conn' until + * the provided 'msg' is found in the logs exactly 'expectedCount' times, or it times out. + * Throws an exception on timeout. + */ + var containsWithCount = function(conn, msg, expectedCount, timeout = 5 * 60 * 1000) { + var count = 0; + assert.soon( + function() { + count = 0; + var logMessages = getGlobalLog(conn); + if (logMessages === null) { + return false; + } + for (var i = 0; i < logMessages.length; i++) { + if (logMessages[i].indexOf(msg) != -1) { + count++; + } + } + + return expectedCount === count; + }, + 'Expected ' + expectedCount + ', but instead saw ' + count + + ' log entries containing the following message: ' + msg, + timeout, + 300); + }; + + /* + * Converts a scalar or object to a string format suitable for matching against log output. + * Field names are not quoted, and by default strings which are not within an enclosing + * object are not escaped. Similarly, integer values without an enclosing object are + * serialized as integers, while those within an object are serialized as floats to one + * decimal point. NumberLongs are unwrapped prior to serialization. + */ + const formatAsLogLine = function(value, escapeStrings, toDecimal) { + if (typeof value === "string") { + return (escapeStrings ? `"${value}"` : value); + } else if (typeof value === "number") { + return (Number.isInteger(value) && toDecimal ? value.toFixed(1) : value); + } else if (value instanceof NumberLong) { + return `${value}`.match(/NumberLong..(.*)../m)[1]; + } else if (typeof value !== "object") { + return value; + } else if (Object.keys(value).length === 0) { + return Array.isArray(value) ? "[]" : "{}"; + } + let serialized = []; + escapeStrings = toDecimal = true; + for (let fieldName in value) { + const valueStr = formatAsLogLine(value[fieldName], escapeStrings, toDecimal); + serialized.push(Array.isArray(value) ? valueStr : `${fieldName}: ${valueStr}`); + } + return (Array.isArray(value) ? `[ ${serialized.join(', ')} ]` + : `{ ${serialized.join(', ')} }`); + }; + + return { + getGlobalLog: getGlobalLog, + checkContainsOnce: checkContainsOnce, + contains: contains, + containsWithCount: containsWithCount, + formatAsLogLine: formatAsLogLine + }; + })(); +})(); diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js index dee9b4c75a0..05c4b584ff2 100644 --- a/src/mongo/shell/replsettest.js +++ b/src/mongo/shell/replsettest.js @@ -570,6 +570,51 @@ var ReplSetTest = function(opts) { }; /** + * A special version of awaitSecondaryNodes() used exclusively by rollback_test.js. + * Wraps around awaitSecondaryNodes() itself and checks for an unrecoverable rollback + * if it throws. + */ + this.awaitSecondaryNodesForRollbackTest = function(timeout, + connToCheckForUnrecoverableRollback) { + try { + this.awaitSecondaryNodes(timeout); + } catch (originalEx) { + // There is a special case where we expect the (rare) possibility of unrecoverable + // rollbacks with EMRC:false in rollback suites with unclean shutdowns. + jsTestLog("Exception in 'awaitSecondaryNodes', checking for unrecoverable rollback"); + if (connToCheckForUnrecoverableRollback) { + const conn = connToCheckForUnrecoverableRollback; + + const statusRes = assert.commandWorked(conn.adminCommand({replSetGetStatus: 1})); + const isRecovering = (statusRes.myState === ReplSetTest.State.RECOVERING); + const hasNoSyncSource = (statusRes.syncSourceId === -1); + + const cmdLineOptsRes = assert.commandWorked(conn.adminCommand("getCmdLineOpts")); + const hasEMRCFalse = + (cmdLineOptsRes.parsed.replication.enableMajorityReadConcern === false); + + if (isRecovering && hasNoSyncSource && hasEMRCFalse) { + try { + const n = this.getNodeId(conn); + const connToCheck = _useBridge ? _unbridgedNodes[n] : this.nodes[n]; + // Confirm that the node is unable to recover after rolling back. + checkLog.contains( + connToCheck, + "remote oplog does not contain entry with optime matching our required optime ", + 60 * 1000); + } catch (checkLogEx) { + throw originalEx; + } + // Add this info to the original exception. + originalEx.unrecoverableRollbackDetected = true; + } + } + // Re-throw the original exception in all cases. + throw originalEx; + } + }; + + /** * Blocks until the specified node says it's syncing from the given upstream node. */ this.awaitSyncSource = function(node, upstreamNode, timeout) { |