summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYu Jin Kang Park <yujin.kang@mongodb.com>2022-03-22 09:54:36 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-06-21 12:38:10 +0000
commit8107aae45548b6f3e823dea47896ba07fcced9d7 (patch)
treeec60d26ec9952a4e6591c9db5364efbe934d71d9
parentb208df54e42f1045b3c5d6ac28143f73e82bec0a (diff)
downloadmongo-8107aae45548b6f3e823dea47896ba07fcced9d7.tar.gz
SERVER-64244 Drop exceptions due to transient errors for whole runBackgroundDbCheck function
-rw-r--r--jstests/hooks/run_dbcheck_background.js185
-rw-r--r--src/mongo/shell/assert.js22
2 files changed, 115 insertions, 92 deletions
diff --git a/jstests/hooks/run_dbcheck_background.js b/jstests/hooks/run_dbcheck_background.js
index 660fe3321e6..383644bbf95 100644
--- a/jstests/hooks/run_dbcheck_background.js
+++ b/jstests/hooks/run_dbcheck_background.js
@@ -17,115 +17,116 @@ TestData = TestData || {};
const conn = db.getMongo();
const topology = DiscoverTopology.findConnectedNodes(conn);
-function runBackgroundDbCheck(hosts) {
- const quietly = (func) => {
- const printOriginal = print;
- try {
- print = Function.prototype;
- func();
- } finally {
- print = printOriginal;
- }
- };
+const exceptionFilteredBackgroundDbCheck = function(hosts) {
+ const runBackgroundDbCheck = function(hosts) {
+ const quietly = (func) => {
+ const printOriginal = print;
+ try {
+ print = Function.prototype;
+ func();
+ } finally {
+ print = printOriginal;
+ }
+ };
- let rst;
- // We construct the ReplSetTest instance with the print() function overridden to be a no-op
- // in order to suppress the log messages about the replica set configuration. The
- // run_dbcheck_background.js hook is executed frequently by resmoke.py and would
- // otherwise lead to generating an overwhelming amount of log messages.
- quietly(() => {
- rst = new ReplSetTest(hosts[0]);
- });
-
- const dbNames = new Set();
- const primary = rst.getPrimary();
-
- const version =
- assert
- .commandWorked(primary.adminCommand({getParameter: 1, featureCompatibilityVersion: 1}))
- .featureCompatibilityVersion.version;
- if (version != latestFCV) {
- print("Not running dbCheck in FCV " + version);
- return {ok: 1};
- }
+ let rst;
+ // We construct the ReplSetTest instance with the print() function overridden to be a no-op
+ // in order to suppress the log messages about the replica set configuration. The
+ // run_dbcheck_background.js hook is executed frequently by resmoke.py and would
+ // otherwise lead to generating an overwhelming amount of log messages.
+ quietly(() => {
+ rst = new ReplSetTest(hosts[0]);
+ });
- print("Running dbCheck for: " + rst.getURL());
+ const dbNames = new Set();
+ const primary = rst.getPrimary();
- const adminDb = primary.getDB('admin');
- let res = assert.commandWorked(adminDb.runCommand({listDatabases: 1, nameOnly: true}));
- for (let dbInfo of res.databases) {
- dbNames.add(dbInfo.name);
- }
+ const version = assert
+ .commandWorked(primary.adminCommand(
+ {getParameter: 1, featureCompatibilityVersion: 1}))
+ .featureCompatibilityVersion.version;
+ if (version != latestFCV) {
+ print("Not running dbCheck in FCV " + version);
+ return {ok: 1};
+ }
- // Transactions cannot be run on the following databases so we don't attempt to read at a
- // clusterTime on them either. (The "local" database is also not replicated.)
- // The config.transactions collection is different between primaries and secondaries.
- dbNames.delete('config');
- dbNames.delete('local');
+ print("Running dbCheck for: " + rst.getURL());
+
+ const adminDb = primary.getDB('admin');
+ let res = assert.commandWorked(adminDb.runCommand({listDatabases: 1, nameOnly: true}));
+ for (let dbInfo of res.databases) {
+ dbNames.add(dbInfo.name);
+ }
- dbNames.forEach((dbName) => {
- try {
+ // Transactions cannot be run on the following databases so we don't attempt to read at a
+ // clusterTime on them either. (The "local" database is also not replicated.)
+ // The config.transactions collection is different between primaries and secondaries.
+ dbNames.delete('config');
+ dbNames.delete('local');
+
+ dbNames.forEach((dbName) => {
assert.commandWorked(primary.getDB(dbName).runCommand({dbCheck: 1}));
jsTestLog("dbCheck done on database " + dbName);
- } catch (e) {
- if (e.code === ErrorCodes.NamespaceNotFound || e.code === ErrorCodes.LockTimeout ||
- e.code == ErrorCodes.Interrupted) {
- jsTestLog("Skipping dbCheck on database " + dbName +
- " due to transient error: " + tojson(e));
- return;
- } else {
- throw e;
- }
- }
- const dbCheckCompleted = (db) => {
- return db.currentOp().inprog.filter(x => x["desc"] == "dbCheck")[0] === undefined;
- };
+ const dbCheckCompleted = (db) => {
+ return db.currentOp({$all: true}).inprog.filter(x => x["desc"] === "dbCheck")[0] ===
+ undefined;
+ };
+
+ assert.soon(() => dbCheckCompleted(adminDb),
+ "timed out waiting for dbCheck to finish on database: " + dbName);
+ });
- assert.soon(() => dbCheckCompleted(adminDb),
- "timed out waiting for dbCheck to finish on database: " + dbName);
- });
-
- // Wait for all secondaries to finish applying all dbcheck batches.
- rst.awaitReplication();
-
- const nodes = [
- rst.getPrimary(),
- ...rst.getSecondaries().filter(conn => {
- return !conn.adminCommand({isMaster: 1}).arbiterOnly;
- })
- ];
- nodes.forEach((node) => {
- // Assert no errors (i.e., found inconsistencies). Allow warnings. Tolerate SnapshotTooOld
- // errors, as they can occur if the primary is slow enough processing a batch that the
- // secondary is unable to obtain the timestamp the primary used.
- const healthlog = node.getDB('local').system.healthlog;
- // Regex matching strings that start without "SnapshotTooOld"
- const regexStringWithoutSnapTooOld = /^((?!^SnapshotTooOld).)*$/;
- let errs =
- healthlog.find({"severity": "error", "data.error": regexStringWithoutSnapTooOld});
- if (errs.hasNext()) {
- const err = "dbCheck found inconsistency on " + node.host;
- jsTestLog(err + ". Errors: ");
- for (let count = 0; errs.hasNext() && count < 20; count++) {
- jsTestLog(tojson(errs.next()));
+ // Wait for all secondaries to finish applying all dbcheck batches.
+ rst.awaitReplication();
+
+ const nodes = [
+ rst.getPrimary(),
+ ...rst.getSecondaries().filter(conn => {
+ return !conn.adminCommand({isMaster: 1}).arbiterOnly;
+ })
+ ];
+ nodes.forEach((node) => {
+ // Assert no errors (i.e., found inconsistencies). Allow warnings. Tolerate
+ // SnapshotTooOld errors, as they can occur if the primary is slow enough processing a
+ // batch that the secondary is unable to obtain the timestamp the primary used.
+ const healthlog = node.getDB('local').system.healthlog;
+ // Regex matching strings that start without "SnapshotTooOld"
+ const regexStringWithoutSnapTooOld = /^((?!^SnapshotTooOld).)*$/;
+ let errs =
+ healthlog.find({"severity": "error", "data.error": regexStringWithoutSnapTooOld});
+ if (errs.hasNext()) {
+ const err = "dbCheck found inconsistency on " + node.host;
+ jsTestLog(err + ". Errors: ");
+ for (let count = 0; errs.hasNext() && count < 20; count++) {
+ jsTestLog(tojson(errs.next()));
+ }
+ assert(false, err);
}
- assert(false, err);
- }
- jsTestLog("Checked health log on " + node.host);
- });
+ jsTestLog("Checked health log on " + node.host);
+ });
- return {ok: 1};
-}
+ return {ok: 1};
+ };
+
+ const onDrop = function(e) {
+ jsTestLog("Skipping dbCheck due to transient error: " + tojson(e));
+ return {ok: 1};
+ };
+
+ return assert.dropExceptionsWithCode(() => {
+ return runBackgroundDbCheck(hosts);
+ }, [ErrorCodes.NamespaceNotFound, ErrorCodes.LockTimeout, ErrorCodes.Interrupted], onDrop);
+};
if (topology.type === Topology.kReplicaSet) {
- let res = runBackgroundDbCheck(topology.nodes);
+ let res = exceptionFilteredBackgroundDbCheck(topology.nodes);
assert.commandWorked(res, () => 'dbCheck replication consistency check failed: ' + tojson(res));
} else if (topology.type === Topology.kShardedCluster) {
const threads = [];
try {
if (topology.configsvr.type === Topology.kReplicaSet) {
- const thread = new Thread(runBackgroundDbCheck, topology.configsvr.nodes);
+ const thread = new Thread(exceptionFilteredBackgroundDbCheck, topology.configsvr.nodes);
threads.push(thread);
thread.start();
}
@@ -133,7 +134,7 @@ if (topology.type === Topology.kReplicaSet) {
for (let shardName of Object.keys(topology.shards)) {
const shard = topology.shards[shardName];
if (shard.type === Topology.kReplicaSet) {
- const thread = new Thread(runBackgroundDbCheck, shard.nodes);
+ const thread = new Thread(exceptionFilteredBackgroundDbCheck, shard.nodes);
threads.push(thread);
thread.start();
} else {
diff --git a/src/mongo/shell/assert.js b/src/mongo/shell/assert.js
index 70afc9d41a5..08b095848b2 100644
--- a/src/mongo/shell/assert.js
+++ b/src/mongo/shell/assert.js
@@ -611,6 +611,28 @@ assert = (function() {
return res;
};
+ assert.dropExceptionsWithCode = function(func, dropCodes, onDrop) {
+ if (typeof func !== "function") {
+ doassert('assert.dropExceptionsWithCode 1st argument must be a function');
+ }
+ if (typeof onDrop !== "function") {
+ doassert('assert.dropExceptionsWithCode 3rd argument must be a function');
+ }
+ if (!Array.isArray(dropCodes)) {
+ dropCodes = [dropCodes];
+ }
+
+ try {
+ return func();
+ } catch (e) {
+ if (dropCodes.some((ec) => e.code === ec)) {
+ return onDrop(e);
+ } else {
+ throw e;
+ }
+ }
+ };
+
assert.throws.automsg = function(func, params) {
if (arguments.length === 1)
params = [];