summaryrefslogtreecommitdiff
path: root/jstests/watchdog
diff options
context:
space:
mode:
authorMark Benvenuto <mark.benvenuto@mongodb.com>2019-05-13 21:16:52 -0400
committerMark Benvenuto <mark.benvenuto@mongodb.com>2019-05-13 21:16:52 -0400
commit39f8c6af5ef1f637bdb2120b1cfb8b507368a7f8 (patch)
tree9f2587793cb215b9a36eb519e0b8d02bd05da317 /jstests/watchdog
parent9ab10de2762ba48532d9bc6717a434672eee8475 (diff)
downloadmongo-39f8c6af5ef1f637bdb2120b1cfb8b507368a7f8.tar.gz
SERVER-41023 Move Storage Node Watchdog to community
Diffstat (limited to 'jstests/watchdog')
-rw-r--r--jstests/watchdog/charybdefs_setup.sh27
-rw-r--r--jstests/watchdog/lib/charybdefs_lib.js127
-rw-r--r--jstests/watchdog/lib/wd_test_common.js54
-rw-r--r--jstests/watchdog/wd_auditpath_hang.js21
-rw-r--r--jstests/watchdog/wd_dbpath_hang.js14
-rw-r--r--jstests/watchdog/wd_journal_hang.js33
-rw-r--r--jstests/watchdog/wd_logpath_hang.js14
-rw-r--r--jstests/watchdog/wd_setparam.js60
8 files changed, 350 insertions, 0 deletions
diff --git a/jstests/watchdog/charybdefs_setup.sh b/jstests/watchdog/charybdefs_setup.sh
new file mode 100644
index 00000000000..31cd1f0be05
--- /dev/null
+++ b/jstests/watchdog/charybdefs_setup.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Script to setup charybdefs
+set -euo pipefail
+IFS=$'\n\t'
+
+if [ "$#" -ne 0 ]; then
+ echo "This script does not take any arguments"
+ exit 1
+fi
+
+echo Start - charybdefs_setup.sh
+
+cd /data
+
+rm -rf /data/charybdefs
+rm -rf /data/thrift
+
+# Use the mongo branch and fork from here
+git clone -b mongo_42 https://github.com/markbenvenuto/charybdefs.git
+
+# Run the build script in the mongo branch
+cd charybdefs/mongo
+
+# Build and setup thrift and charybdefs
+PATH=/opt/mongodbtoolchain/v3/bin:$PATH bash ./build.sh
+
+echo Done - charybdefs_setup.sh
diff --git a/jstests/watchdog/lib/charybdefs_lib.js b/jstests/watchdog/lib/charybdefs_lib.js
new file mode 100644
index 00000000000..f80246426d8
--- /dev/null
+++ b/jstests/watchdog/lib/charybdefs_lib.js
@@ -0,0 +1,127 @@
+// Exit code that the watchdog uses on exit
+const EXIT_WATCHDOG = 61;
+
+/**
+ * Control the Charybdefs file system for Fault Injectiong testing
+ *
+ * @param {string} test_name unique name for test directories
+ */
+function CharybdefsControl(test_name) {
+ 'use strict';
+
+ const python = "/opt/mongodbtoolchain/v3/bin/python3";
+ let control_py = "/data/charybdefs/mongo/control.py";
+
+ // Use the minimum watchdog period
+ const wd_period_sec = 60;
+
+ // Since the watchdog can take up to (2 x period) to detect failures, stall the write for that
+ // amount of time plus a small buffer of time to account for thread scheduling, etc.
+ const fs_delay_sec = wd_period_sec * 2 + 5;
+
+ const mount_point = MongoRunner.toRealPath(test_name + '_mnt');
+ const backing_path = MongoRunner.toRealPath(test_name + '_backing');
+
+ this._runControl = function(cmd, ...args) {
+ let cmd_args = [python, control_py, cmd];
+ cmd_args = cmd_args.concat(args);
+ let ret = run.apply(null, cmd_args);
+ assert.eq(ret, 0);
+ };
+
+ /**
+ * Get the path of the mounted Charybdefs file system.
+ *
+ * @return {string} mount point
+ */
+ this.getMountPath = function() {
+ return mount_point;
+ };
+
+ /**
+ * Get the Watchdog Period.
+ *
+ * @return {number} number of sections
+ */
+ this.getWatchdogPeriodSeconds = function() {
+ return wd_period_sec;
+ };
+
+ /**
+ * Start the Charybdefs filesystem.
+ */
+ this.start = function() {
+ this.cleanup();
+
+ this._runControl("start",
+ "--fuse_mount=" + mount_point,
+ "--backing_path=" + backing_path,
+ "--log_file=foo_fs.log");
+ print("Charybdefs sucessfully started.");
+ };
+
+ // Get the current check generation
+ function _getGeneration(admin) {
+ const result = admin.runCommand({"serverStatus": 1});
+
+ assert.commandWorked(result);
+
+ return result.watchdog.checkGeneration;
+ }
+
+ /**
+ * Wait for the watchdog to run some checks first.
+ *
+ * @param {object} MongoDB connection to admin database
+ */
+ this.waitForWatchdogToStart = function(admin) {
+ print("Waiting for MongoDB watchdog to checks run twice.");
+ assert.soon(function() {
+ return _getGeneration(admin) > 2;
+ }, "Watchdog did not start running", 5 * wd_period_sec * 1000);
+ };
+
+ /**
+ * Inject delay on write, and wait to MongoDB to get hung.
+ *
+ * @param {string} file_name - file name to inject fault on
+ */
+ this.addWriteDelayFaultAndWait = function(file_name) {
+ // Convert seconds to microseconds for charybdefs
+ const delay_us = fs_delay_sec * 1000000;
+ this.addFault("write_buf", file_name, delay_us);
+
+ // Wait for watchdog to stop
+ print("Waiting for MongoDB to hang.");
+ sleep(fs_delay_sec * 1000);
+
+ };
+
+ /**
+ * Add a fault to inject.
+ *
+ * @param {string} method - name of fuse method to inject fault for
+ * @param {string} file_name - file name to inject fault on
+ * @param {number} delay_us - optional delay in microseconds to wait
+ */
+ this.addFault = function(method, file_name, delay_us) {
+
+ this._runControl("set_fault",
+ "--methods=" + method,
+ "--errno=5",
+ "--probability=100000",
+ "--regexp=.*" + file_name,
+ "--delay_us=" + delay_us);
+ };
+
+ /**
+ * Shutdown and clean up the Charybdefs filesystem.
+ */
+ this.cleanup = function() {
+ this._runControl("stop_all", "--fuse_mount=" + mount_point);
+
+ // Delete any remaining files
+ resetDbpath(mount_point);
+ resetDbpath(backing_path);
+ };
+}
diff --git a/jstests/watchdog/lib/wd_test_common.js b/jstests/watchdog/lib/wd_test_common.js
new file mode 100644
index 00000000000..46e625d6c9e
--- /dev/null
+++ b/jstests/watchdog/lib/wd_test_common.js
@@ -0,0 +1,54 @@
+// Storage Node Watchdog common test code
+//
+load("jstests/watchdog/lib/charybdefs_lib.js");
+
+function testMongoDHang(control, mongod_options) {
+ 'use strict';
+
+ // Now start MongoD with it enabled at startup
+ //
+ if (mongod_options.hasOwnProperty("dbPath")) {
+ resetDbpath(mongod_options.dbPath);
+ }
+
+ var options = {
+ setParameter: "watchdogPeriodSeconds=" + control.getWatchdogPeriodSeconds(),
+ verbose: 1,
+ };
+
+ options = Object.extend(mongod_options, options);
+
+ const conn = MongoRunner.runMongod(options);
+ assert.neq(null, conn, 'mongod was unable to start up');
+
+ // Wait for watchdog to get running
+ const admin = conn.getDB("admin");
+
+ // Wait for the watchdog to run some checks first
+ control.waitForWatchdogToStart(admin);
+
+ // Hang the file system
+ control.addWriteDelayFaultAndWait("watchdog_probe.*");
+
+ // Check MongoD is dead by sending SIGTERM
+ // This will trigger our "nice" shutdown, but since mongod is stuck in the kernel doing I/O,
+ // the process will not terminate until charybdefs is done sleeping.
+ print("Stopping MongoDB now, it will terminate once charybdefs is done sleeping.");
+ MongoRunner.stopMongod(conn, undefined, {allowedExitCode: EXIT_WATCHDOG});
+}
+
+function testFuseAndMongoD(control, mongod_options) {
+ 'use strict';
+
+ // Cleanup previous runs
+ control.cleanup();
+
+ try {
+ // Start the file system
+ control.start();
+
+ testMongoDHang(control, mongod_options);
+ } finally {
+ control.cleanup();
+ }
+}
diff --git a/jstests/watchdog/wd_auditpath_hang.js b/jstests/watchdog/wd_auditpath_hang.js
new file mode 100644
index 00000000000..bd961d55a47
--- /dev/null
+++ b/jstests/watchdog/wd_auditpath_hang.js
@@ -0,0 +1,21 @@
+// Storage Node Watchdog - validate watchdog monitors --auditpath
+//
+load("jstests/watchdog/lib/wd_test_common.js");
+
+(function() {
+ 'use strict';
+
+ if (assert.commandWorked(db.runCommand({buildInfo: 1})).modules.includes("enterprise")) {
+ let control = new CharybdefsControl("auditpath_hang");
+
+ const auditPath = control.getMountPath();
+
+ testFuseAndMongoD(control, {
+
+ auditDestination: 'file',
+ auditFormat: 'JSON',
+ auditPath: auditPath + "/auditLog.json"
+ });
+ }
+
+})();
diff --git a/jstests/watchdog/wd_dbpath_hang.js b/jstests/watchdog/wd_dbpath_hang.js
new file mode 100644
index 00000000000..39147fe2229
--- /dev/null
+++ b/jstests/watchdog/wd_dbpath_hang.js
@@ -0,0 +1,14 @@
+// Storage Node Watchdog - validate --dbpath
+//
+load("jstests/watchdog/lib/wd_test_common.js");
+
+(function() {
+ 'use strict';
+
+ let control = new CharybdefsControl("dbpath_hang");
+
+ const dbPath = control.getMountPath() + "/db";
+
+ testFuseAndMongoD(control, {dbpath: dbPath});
+
+})();
diff --git a/jstests/watchdog/wd_journal_hang.js b/jstests/watchdog/wd_journal_hang.js
new file mode 100644
index 00000000000..c07b4298170
--- /dev/null
+++ b/jstests/watchdog/wd_journal_hang.js
@@ -0,0 +1,33 @@
+// Storage Node Watchdog - validate watchdog monitors --dbpath /journal
+// @tags: [requires_wiredtiger,requires_journaling]
+//
+load("jstests/watchdog/lib/wd_test_common.js");
+
+(function() {
+ 'use strict';
+
+ function trimTrailingSlash(dir) {
+ if (dir.endsWith('/')) {
+ return dir.substring(0, dir.length - 1);
+ }
+
+ return dir;
+ }
+
+ let control = new CharybdefsControl("journalpath_hang");
+
+ const journalFusePath = control.getMountPath();
+
+ const dbPath = MongoRunner.toRealDir("$dataDir/mongod-journal");
+
+ const journalLinkPath = dbPath + "/journal";
+
+ resetDbpath(dbPath);
+
+ // Create a symlink from the non-fuse journal directory to the fuse mount.
+ const ret = run("ln", "-s", trimTrailingSlash(journalFusePath), journalLinkPath);
+ assert.eq(ret, 0);
+
+ // Set noCleanData so that the dbPath is not cleaned because we want to use the journal symlink.
+ testFuseAndMongoD(control, {dbpath: dbPath, noCleanData: true});
+})();
diff --git a/jstests/watchdog/wd_logpath_hang.js b/jstests/watchdog/wd_logpath_hang.js
new file mode 100644
index 00000000000..9a3ec13c845
--- /dev/null
+++ b/jstests/watchdog/wd_logpath_hang.js
@@ -0,0 +1,14 @@
+// Storage Node Watchdog - validate watchdog monitors --logpath
+//
+load("jstests/watchdog/lib/wd_test_common.js");
+
+(function() {
+ 'use strict';
+
+ let control = new CharybdefsControl("logpath_hang");
+
+ const logpath = control.getMountPath();
+
+ testFuseAndMongoD(control, {logpath: logpath + "/foo.log"});
+
+})();
diff --git a/jstests/watchdog/wd_setparam.js b/jstests/watchdog/wd_setparam.js
new file mode 100644
index 00000000000..0857e11b1ff
--- /dev/null
+++ b/jstests/watchdog/wd_setparam.js
@@ -0,0 +1,60 @@
+// Storage Node Watchdog test cases
+// - Validate set parameter functions correctly.
+(function() {
+ 'use strict';
+ const admin = db.getSiblingDB("admin");
+
+ // Check the defaults are correct
+ //
+ function getparam(adminDb, field) {
+ let q = {getParameter: 1};
+ q[field] = 1;
+
+ const ret = adminDb.runCommand(q);
+ return ret[field];
+ }
+
+ // Verify the defaults are as we documented them
+ assert.eq(getparam(admin, "watchdogPeriodSeconds"), -1);
+
+ function setparam(adminDb, obj) {
+ const ret = adminDb.runCommand(Object.extend({setParameter: 1}, obj));
+ return ret;
+ }
+
+ // Negative tests
+ // Negative: set it too low.
+ assert.commandFailed(setparam(admin, {"watchdogPeriodSeconds": 1}));
+ // Negative: set it the min value but fail since it was not enabled.
+ assert.commandFailed(setparam(admin, {"watchdogPeriodSeconds": 60}));
+ // Negative: set it the min value + 1 but fail since it was not enabled.
+ assert.commandFailed(setparam(admin, {"watchdogPeriodSeconds": 61}));
+
+ // Now test MongoD with it enabled at startup
+ //
+ const conn = MongoRunner.runMongod({setParameter: "watchdogPeriodSeconds=60"});
+ assert.neq(null, conn, 'mongod was unable to start up');
+
+ const admin2 = conn.getDB("admin");
+
+ // Validate defaults
+ assert.eq(getparam(admin2, "watchdogPeriodSeconds"), 60);
+
+ // Negative: set it too low.
+ assert.commandFailed(setparam(admin2, {"watchdogPeriodSeconds": 1}));
+ // Positive: set it the min value
+ assert.commandWorked(setparam(admin2, {"watchdogPeriodSeconds": 60}));
+ // Positive: set it the min value + 1
+ assert.commandWorked(setparam(admin2, {"watchdogPeriodSeconds": 61}));
+
+ // Positive: disable it
+ assert.commandWorked(setparam(admin2, {"watchdogPeriodSeconds": -1}));
+
+ assert.eq(getparam(admin2, "watchdogPeriodSeconds"), -1);
+
+ // Positive: enable it again
+ assert.commandWorked(setparam(admin2, {"watchdogPeriodSeconds": 60}));
+
+ MongoRunner.stopMongod(conn);
+
+})();