summaryrefslogtreecommitdiff
path: root/jstests/hooks/run_dbcheck_background.js
diff options
context:
space:
mode:
Diffstat (limited to 'jstests/hooks/run_dbcheck_background.js')
-rw-r--r--jstests/hooks/run_dbcheck_background.js162
1 files changed, 162 insertions, 0 deletions
diff --git a/jstests/hooks/run_dbcheck_background.js b/jstests/hooks/run_dbcheck_background.js
new file mode 100644
index 00000000000..4abfb414f7c
--- /dev/null
+++ b/jstests/hooks/run_dbcheck_background.js
@@ -0,0 +1,162 @@
+/**
+ * Runs dbCheck in background.
+ */
+'use strict';
+
+(function() {
+load('jstests/libs/discover_topology.js'); // For Topology and DiscoverTopology.
+load('jstests/libs/parallelTester.js'); // For Thread.
+
+if (typeof db === 'undefined') {
+ throw new Error(
+ "Expected mongo shell to be connected a server, but global 'db' object isn't defined");
+}
+
+TestData = TestData || {};
+
+const conn = db.getMongo();
+const topology = DiscoverTopology.findConnectedNodes(conn);
+
+function runBackgroundDbCheck(hosts) {
+ const quietly = (func) => {
+ const printOriginal = print;
+ try {
+ print = Function.prototype;
+ func();
+ } finally {
+ print = printOriginal;
+ }
+ };
+
+ let rst;
+ // We construct the ReplSetTest instance with the print() function overridden to be a no-op
+ // in order to suppress the log messages about the replica set configuration. The
+ // run_dbcheck_background.js hook is executed frequently by resmoke.py and would
+ // otherwise lead to generating an overwhelming amount of log messages.
+ quietly(() => {
+ rst = new ReplSetTest(hosts[0]);
+ });
+
+ const dbNames = new Set();
+ const primary = rst.getPrimary();
+
+ const version =
+ assert
+ .commandWorked(primary.adminCommand({getParameter: 1, featureCompatibilityVersion: 1}))
+ .featureCompatibilityVersion.version;
+ if (version != latestFCV) {
+ print("Not running dbCheck in FCV " + version);
+ return {ok: 1};
+ }
+
+ print("Running dbCheck for: " + rst.getURL());
+
+ const adminDb = primary.getDB('admin');
+ let res = assert.commandWorked(adminDb.runCommand({listDatabases: 1, nameOnly: true}));
+ for (let dbInfo of res.databases) {
+ dbNames.add(dbInfo.name);
+ }
+
+ // Transactions cannot be run on the following databases so we don't attempt to read at a
+ // clusterTime on them either. (The "local" database is also not replicated.)
+ // The config.transactions collection is different between primaries and secondaries.
+ dbNames.delete('config');
+ dbNames.delete('local');
+
+ dbNames.forEach((dbName) => {
+ try {
+ assert.commandWorked(primary.getDB(dbName).runCommand({dbCheck: 1}));
+ jsTestLog("dbCheck done on database " + dbName);
+ } catch (e) {
+ if (e.code === ErrorCodes.NamespaceNotFound || e.code === ErrorCodes.LockTimeout) {
+ jsTestLog("Skipping dbCheck on database " + dbName +
+ " due to transient error: " + tojson(e));
+ return;
+ } else {
+ throw e;
+ }
+ }
+
+ const dbCheckCompleted = (db) => {
+ return db.currentOp().inprog.filter(x => x["desc"] == "dbCheck")[0] === undefined;
+ };
+
+ assert.soon(() => dbCheckCompleted(adminDb),
+ "timed out waiting for dbCheck to finish on database: " + dbName);
+ });
+
+ // Wait for all secondaries to finish applying all dbcheck batches.
+ rst.awaitReplication();
+
+ const nodes = [
+ rst.getPrimary(),
+ ...rst.getSecondaries().filter(conn => {
+ return !conn.adminCommand({isMaster: 1}).arbiterOnly;
+ })
+ ];
+ nodes.forEach((node) => {
+ // Assert no errors (i.e., found inconsistencies). Allow warnings.
+ const healthlog = node.getDB('local').system.healthlog;
+ let errs = healthlog.find({"severity": "error"});
+ if (errs.hasNext()) {
+ const err = "dbCheck found inconsistency on " + node.host;
+ jsTestLog(err + ". Errors: ");
+ for (let count = 0; errs.hasNext() && count < 20; count++) {
+ jsTestLog(tojson(errs.next()));
+ }
+ assert(false, err);
+ }
+ jsTestLog("Checked health log on " + node.host);
+ });
+
+ return {ok: 1};
+}
+
+if (topology.type === Topology.kReplicaSet) {
+ let res = runBackgroundDbCheck(topology.nodes);
+ assert.commandWorked(res, () => 'dbCheck replication consistency check failed: ' + tojson(res));
+} else if (topology.type === Topology.kShardedCluster) {
+ const threads = [];
+ try {
+ if (topology.configsvr.type === Topology.kReplicaSet) {
+ const thread = new Thread(runBackgroundDbCheck, topology.configsvr.nodes);
+ threads.push(thread);
+ thread.start();
+ }
+
+ for (let shardName of Object.keys(topology.shards)) {
+ const shard = topology.shards[shardName];
+ if (shard.type === Topology.kReplicaSet) {
+ const thread = new Thread(runBackgroundDbCheck, shard.nodes);
+ threads.push(thread);
+ thread.start();
+ } else {
+ throw new Error('Unrecognized topology format: ' + tojson(topology));
+ }
+ }
+ } finally {
+ // Wait for each thread to finish. Throw an error if any thread fails.
+ let exception;
+ const returnData = threads.map(thread => {
+ try {
+ thread.join();
+ return thread.returnData();
+ } catch (e) {
+ if (!exception) {
+ exception = e;
+ }
+ }
+ });
+ if (exception) {
+ throw exception;
+ }
+
+ returnData.forEach(res => {
+ assert.commandWorked(
+ res, () => 'dbCheck replication consistency check failed: ' + tojson(res));
+ });
+ }
+} else {
+ throw new Error('Unsupported topology configuration: ' + tojson(topology));
+}
+})();