diff options
-rw-r--r-- | buildscripts/resmokelib/cli.py | 5 | ||||
-rw-r--r-- | buildscripts/resmokelib/config.py | 4 | ||||
-rw-r--r-- | buildscripts/resmokelib/configure_resmoke.py | 1 | ||||
-rw-r--r-- | buildscripts/resmokelib/core/process.py | 7 | ||||
-rw-r--r-- | buildscripts/resmokelib/run/__init__.py | 94 | ||||
-rw-r--r-- | buildscripts/resmokelib/testing/fixtures/fixturelib.py | 1 | ||||
-rw-r--r-- | buildscripts/tests/resmokelib/run/test_auto_kill_rogue_process.py | 124 |
7 files changed, 234 insertions, 2 deletions
diff --git a/buildscripts/resmokelib/cli.py b/buildscripts/resmokelib/cli.py index 2cb85721e9e..c3b4582f455 100644 --- a/buildscripts/resmokelib/cli.py +++ b/buildscripts/resmokelib/cli.py @@ -1,7 +1,8 @@ """Command-line entry-point into resmoke.""" import time - +import os +import psutil from buildscripts.resmokelib import parser @@ -13,6 +14,8 @@ def main(argv): :return: None """ __start_time = time.time() + os.environ['RESMOKE_PARENT_PROCESS'] = str(os.getpid()) + os.environ['RESMOKE_PARENT_CTIME'] = str(psutil.Process().create_time()) subcommand = parser.parse_command_line( argv[1:], start_time=__start_time, usage="Resmoke is MongoDB's correctness testing orchestrator.\n" diff --git a/buildscripts/resmokelib/config.py b/buildscripts/resmokelib/config.py index 6c719a1bf2b..d1f1e052312 100644 --- a/buildscripts/resmokelib/config.py +++ b/buildscripts/resmokelib/config.py @@ -50,6 +50,7 @@ DEFAULT_GENNY_EXECUTABLE = os.path.normpath("genny/build/src/driver/genny") # Names below correspond to how they are specified via the command line or in the options YAML file. DEFAULTS = { + "auto_kill": "on", "always_use_log_files": False, "archive_limit_mb": 5000, "archive_limit_tests": 10, @@ -265,6 +266,9 @@ class MultiversionOptions(object): # Variables that are set by the user at the command line or with --options. ## +# Allow resmoke permission to automatically kill existing rogue mongo processes. +AUTO_KILL = "on" + # Log to files located in the db path and don't clean dbpaths after tests. ALWAYS_USE_LOG_FILES = False diff --git a/buildscripts/resmokelib/configure_resmoke.py b/buildscripts/resmokelib/configure_resmoke.py index a51a93bd086..af8a0fb09fe 100644 --- a/buildscripts/resmokelib/configure_resmoke.py +++ b/buildscripts/resmokelib/configure_resmoke.py @@ -218,6 +218,7 @@ be invoked as either: _config.ENABLED_FEATURE_FLAGS, all_feature_flags = setup_feature_flags() not_enabled_feature_flags = list(set(all_feature_flags) - set(_config.ENABLED_FEATURE_FLAGS)) + _config.AUTO_KILL = config.pop("auto_kill") _config.ALWAYS_USE_LOG_FILES = config.pop("always_use_log_files") _config.BASE_PORT = int(config.pop("base_port")) _config.BACKUP_ON_RESTART_DIR = config.pop("backup_on_restart_dir") diff --git a/buildscripts/resmokelib/core/process.py b/buildscripts/resmokelib/core/process.py index 1e22aa532a3..f4517aa6cef 100644 --- a/buildscripts/resmokelib/core/process.py +++ b/buildscripts/resmokelib/core/process.py @@ -89,7 +89,14 @@ class Process(object): self.logger = logger self.args = args + self.env = utils.default_if_none(env, os.environ.copy()) + if not self.env.get('RESMOKE_PARENT_PROCESS'): + self.env['RESMOKE_PARENT_PROCESS'] = os.environ.get('RESMOKE_PARENT_PROCESS', + str(os.getpid())) + if not self.env.get('RESMOKE_PARENT_CTIME'): + self.env['RESMOKE_PARENT_CTIME'] = os.environ.get('RESMOKE_PARENT_CTIME', + str(psutil.Process().create_time())) if env_vars is not None: self.env.update(env_vars) diff --git a/buildscripts/resmokelib/run/__init__.py b/buildscripts/resmokelib/run/__init__.py index 85d6fc7a78b..64652279fa6 100644 --- a/buildscripts/resmokelib/run/__init__.py +++ b/buildscripts/resmokelib/run/__init__.py @@ -8,11 +8,13 @@ import os.path import random import shlex import sys +import textwrap import time import shutil import curatorbin import pkg_resources +import psutil from buildscripts.resmokelib import parser as main_parser from buildscripts.resmokelib import config @@ -50,7 +52,6 @@ class TestRunner(Subcommand): # pylint: disable=too-many-instance-attributes self._archive = None self._interrupted = False self._exit_code = 0 - runtime_recorder.setup_start_time(start_time) def _setup_logging(self): @@ -206,6 +207,7 @@ class TestRunner(Subcommand): # pylint: disable=too-many-instance-attributes """Run the suite and tests specified.""" self._resmoke_logger.info("verbatim resmoke.py invocation: %s", " ".join([shlex.quote(arg) for arg in sys.argv])) + self._check_for_mongo_processes() if config.EVERGREEN_TASK_DOC: self._resmoke_logger.info("Evergreen task documentation:\n%s", @@ -282,6 +284,88 @@ class TestRunner(Subcommand): # pylint: disable=too-many-instance-attributes with open("local-resmoke-invocation.txt", "w") as fh: fh.write(f"{resmoke_env_options} {local_resmoke_invocation}") + def _check_for_mongo_processes(self): + # pylint: disable=too-many-branches, + """Check for existing mongo processes as they could interfere with running the tests.""" + + if config.AUTO_KILL == 'off' or config.SHELL_CONN_STRING is not None: + return + + rogue_procs = [] + # Iterate over all running process + for proc in psutil.process_iter(): + try: + parent_resmoke_pid = proc.environ().get('RESMOKE_PARENT_PROCESS') + parent_resmoke_ctime = proc.environ().get('RESMOKE_PARENT_CTIME') + if not parent_resmoke_pid: + continue + if psutil.pid_exists(int(parent_resmoke_pid)): + # Double check `parent_resmoke_pid` is really a rooting resmoke process. Having + # the RESMOKE_PARENT_PROCESS environment variable proves it is a process which + # was spawned through resmoke. Only a resmoke process has RESMOKE_PARENT_PROCESS + # as the value of its own PID. + parent_resmoke_proc = psutil.Process(int(parent_resmoke_pid)) + if parent_resmoke_ctime == str(parent_resmoke_proc.create_time()): + continue + + rogue_procs.append(proc) + + except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): + pass + + if rogue_procs: + msg = "detected existing mongo processes. Please clean up these processes as they may affect tests:" + + if config.AUTO_KILL == 'on': + msg += textwrap.dedent("""\ + + Congratulations, you have selected auto kill mode: + HASTA LA VISTA MONGO""" + r""" + ______ + <((((((\\\ + / . }\ + ;--..--._|} + (\ '--/\--' ) + \\ | '-' :'| + \\ . -==- .-| + \\ \.__.' \--._ + [\\ __.--| // _/'--. + \ \\ .'-._ ('-----'/ __/ \\ + \ \\ / __>| | '--. | + \ \\ | \ | / / / + \ '\ / \ | | _/ / + \ \ \ | | / / + \ \ \ / + """) + print(f"WARNING: {msg}") + else: + self._resmoke_logger.error("ERROR: %s", msg) + + for proc in rogue_procs: + if config.AUTO_KILL == 'on': + proc_msg = f" Target acquired: pid: {str(proc.pid).ljust(5)} name: {proc.exe()}" + try: + proc.kill() + except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess) as exc: + proc_msg += f" - target escaped: {type(exc).__name__ }" + else: + proc_msg += " - target destroyed\n" + print(proc_msg) + + else: + self._resmoke_logger.error(" pid: %s name: %s", + str(proc.pid).ljust(5), proc.exe()) + + if config.AUTO_KILL == 'on': + print("I'll be back...\n") + else: + raise errors.ResmokeError( + textwrap.dedent("""\ + Failing because existing mongo processes detected. + You can use --autoKillResmokeMongo=on to automatically kill the processes, + or --autoKillResmokeMongo=off to ignore them. + """)) + def _log_resmoke_summary(self, suites): """Log a summary of the resmoke run.""" time_taken = time.time() - self.__start_time @@ -567,6 +651,14 @@ class RunPlugin(PluginInterface): " positional arguments, they will be run using the suites'" " configurations.")) + parser.add_argument( + "--autoKillResmokeMongo", dest="auto_kill", choices=['on', 'error', + 'off'], default='on', + help=("When resmoke starts up, existing mongo processes created from resmoke " + " could cause issues when running tests. This option causes resmoke to kill" + " the existing processes and continue running the test, or if 'error' option" + " is used, prints the offending processes and fails the test.")) + parser.add_argument("--installDir", dest="install_dir", metavar="INSTALL_DIR", help="Directory to search for MongoDB binaries") diff --git a/buildscripts/resmokelib/testing/fixtures/fixturelib.py b/buildscripts/resmokelib/testing/fixtures/fixturelib.py index 76669e25866..78bda3c8c68 100644 --- a/buildscripts/resmokelib/testing/fixtures/fixturelib.py +++ b/buildscripts/resmokelib/testing/fixtures/fixturelib.py @@ -124,6 +124,7 @@ class _FixtureConfig(object): # pylint: disable=too-many-instance-attributes self.DEFAULT_MONGOD_EXECUTABLE = config.DEFAULT_MONGOD_EXECUTABLE self.MONGOD_SET_PARAMETERS = config.MONGOD_SET_PARAMETERS self.FIXTURE_SUBDIR = config.FIXTURE_SUBDIR + self.AUTO_KILL = config.AUTO_KILL self.ALWAYS_USE_LOG_FILES = config.ALWAYS_USE_LOG_FILES self.LAST_LTS_MONGOD_BINARY = LAST_LTS_MONGOD_BINARY self.LAST_LTS_MONGOS_BINARY = LAST_LTS_MONGOS_BINARY diff --git a/buildscripts/tests/resmokelib/run/test_auto_kill_rogue_process.py b/buildscripts/tests/resmokelib/run/test_auto_kill_rogue_process.py new file mode 100644 index 00000000000..402adda4afd --- /dev/null +++ b/buildscripts/tests/resmokelib/run/test_auto_kill_rogue_process.py @@ -0,0 +1,124 @@ +"""Unit tests for buildscripts/resmokelib/run/list_tags.py.""" +# pylint: disable=missing-docstring,protected-access +import unittest +import logging +import os +import sys + +import psutil + +from buildscripts.resmokelib.run import TestRunner +from buildscripts.resmokelib import errors +from buildscripts.resmokelib.core import process +from buildscripts.resmokelib.testing.fixtures import interface as fixture_interface +import buildscripts.resmokelib.config + + +class MockTestRunner(TestRunner): + def _setup_logging(self): + self._exec_logger = logging.getLogger() + self._exec_logger.addHandler(logging.NullHandler()) + self._resmoke_logger = self._exec_logger + + +class TestDetectRogueProcess(unittest.TestCase): + def setUp(self) -> None: + self.command = [sys.executable, '-c', "import time; time.sleep(5)"] + if sys.platform.lower() == 'win32': + self.sigkill_return = fixture_interface.TeardownMode.TERMINATE.value + else: + self.sigkill_return = -fixture_interface.TeardownMode.KILL.value + + if not os.environ.get('RESMOKE_PARENT_PROCESS'): + os.environ['RESMOKE_PARENT_PROCESS'] = str(os.getpid()) + os.environ['RESMOKE_PARENT_CTIME'] = str(psutil.Process().create_time()) + + def test_warn(self): + buildscripts.resmokelib.config.AUTO_KILL = 'warn' + buildscripts.resmokelib.config.SHELL_CONN_STRING = None + + test_runner = MockTestRunner("test") + test_runner._setup_logging() + + try: + test_runner._check_for_mongo_processes() + except errors.ResmokeError: + self.fail("Detected processes when there should be none.") + + tmp_ctime = os.environ['RESMOKE_PARENT_CTIME'] + os.environ['RESMOKE_PARENT_CTIME'] = str("rogue_process") + proc = process.Process(logging.getLogger(), self.command) + proc.start() + os.environ['RESMOKE_PARENT_CTIME'] = tmp_ctime + + with self.assertRaises(errors.ResmokeError): + test_runner._check_for_mongo_processes() + + proc.stop(mode=fixture_interface.TeardownMode.KILL) + proc.wait() + + def test_on(self): + + buildscripts.resmokelib.config.AUTO_KILL = 'on' + buildscripts.resmokelib.config.SHELL_CONN_STRING = None + + test_runner = MockTestRunner("test") + test_runner._setup_logging() + + test_runner._check_for_mongo_processes() + + tmp_ctime = os.environ['RESMOKE_PARENT_CTIME'] + os.environ['RESMOKE_PARENT_CTIME'] = str("rogue_process") + proc = process.Process(logging.getLogger(), self.command) + proc.start() + os.environ['RESMOKE_PARENT_CTIME'] = tmp_ctime + + test_runner._check_for_mongo_processes() + + proc.wait() + + if proc._process.returncode != self.sigkill_return: + self.fail( + f"Detected processes was not killed by resmoke, exit code was {proc._process.returncode}, expected {self.sigkill_return}" + ) + + def test_off(self): + buildscripts.resmokelib.config.AUTO_KILL = 'off' + buildscripts.resmokelib.config.SHELL_CONN_STRING = None + + test_runner = MockTestRunner("test") + test_runner._setup_logging() + + test_runner._check_for_mongo_processes() + + proc = process.Process(logging.getLogger(), self.command) + proc.start() + + test_runner._check_for_mongo_processes() + + proc.stop(mode=fixture_interface.TeardownMode.ABORT) + + if proc._process.returncode == self.sigkill_return: + self.fail("Process was killed when it should not have been.") + proc.wait() + + def test_shell_constring(self): + buildscripts.resmokelib.config.AUTO_KILL = 'warn' + buildscripts.resmokelib.config.SHELL_CONN_STRING = '127.0.0.1:27000' + + test_runner = MockTestRunner("test") + test_runner._setup_logging() + + test_runner._check_for_mongo_processes() + + proc = process.Process(logging.getLogger(), self.command) + proc.start() + + test_runner._check_for_mongo_processes() + + proc.stop(mode=fixture_interface.TeardownMode.ABORT) + + if proc._process.returncode == self.sigkill_return: + self.fail("Process was killed when it should not have been.") + + proc.wait() |