diff options
author | vrachev <vlad.rachev@mongodb.com> | 2020-05-18 09:35:23 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-06-02 14:46:56 +0000 |
commit | 9fcca8acb9a8995e007b5c4c06e5349a57e274e6 (patch) | |
tree | 55448d1a3a46b6ecb144096ce0c0f0fb6e6eadad | |
parent | 255e1caea68aa828cd4bbf45d9a1dfca43d04d5f (diff) | |
download | mongo-9fcca8acb9a8995e007b5c4c06e5349a57e274e6.tar.gz |
SERVER-46882 Reuse debugger process for processes of same type in hang_analyzer.py
6 files changed, 447 insertions, 176 deletions
diff --git a/buildscripts/resmokelib/hang_analyzer/dumper.py b/buildscripts/resmokelib/hang_analyzer/dumper.py index 31a3a2d78f9..a448bc7451a 100644 --- a/buildscripts/resmokelib/hang_analyzer/dumper.py +++ b/buildscripts/resmokelib/hang_analyzer/dumper.py @@ -1,5 +1,7 @@ """Tools to dump debug info for each OS.""" +from abc import ABCMeta, abstractmethod +import logging import os import sys import tempfile @@ -8,54 +10,101 @@ from distutils import spawn # pylint: disable=no-name-in-module from collections import namedtuple from buildscripts.resmokelib.hang_analyzer.process import call, callo, find_program +from buildscripts.resmokelib.hang_analyzer.process_list import Pinfo Dumpers = namedtuple('Dumpers', ['dbg', 'jstack']) -def get_dumpers(): - """Return OS-appropriate dumpers.""" +def get_dumpers(root_logger: logging.Logger, dbg_output: str): + """ + Return OS-appropriate dumpers. + + :param root_logger: Top-level logger + :param dbg_output: 'stdout' or 'file' + """ dbg = None jstack = None if sys.platform.startswith("linux"): - dbg = GDBDumper() + dbg = GDBDumper(root_logger, dbg_output) jstack = JstackDumper() elif sys.platform == "win32" or sys.platform == "cygwin": - dbg = WindowsDumper() + dbg = WindowsDumper(root_logger, dbg_output) jstack = JstackWindowsDumper() elif sys.platform == "darwin": - dbg = LLDBDumper() + dbg = LLDBDumper(root_logger, dbg_output) jstack = JstackDumper() return Dumpers(dbg=dbg, jstack=jstack) -class Dumper(object): - """Abstract base class for OS-specific dumpers.""" +class Dumper(metaclass=ABCMeta): + """ + Abstract base class for OS-specific dumpers. + + :param dbg_output: 'stdout' or 'file' + :param root_logger: Top-level logger + """ + def __init__(self, root_logger: logging.Logger, dbg_output: str): + """Initialize dumper.""" + self._root_logger = root_logger + self._dbg_output = dbg_output + + @abstractmethod def dump_info( # pylint: disable=too-many-arguments,too-many-locals - self, root_logger, logger, pinfo, take_dump): + self, + pinfo: Pinfo, + take_dump: bool, + ): """ Perform dump for a process. - :param root_logger: Top-level logger - :param logger: Logger to output dump info to :param pinfo: A Pinfo describing the process :param take_dump: Whether to take a core dump """ raise NotImplementedError("dump_info must be implemented in OS-specific subclasses") - @staticmethod - def get_dump_ext(): + @abstractmethod + def get_dump_ext(self): """Return the dump file extension.""" raise NotImplementedError("get_dump_ext must be implemented in OS-specific subclasses") + @abstractmethod + def _find_debugger(self, debugger): + """ + Find the installed debugger. + + :param debugger: debugger executable. + """ + raise NotImplementedError("dump_info must be implemented in OS-specific subclasses") + + @abstractmethod + def _prefix(self): + """Return the commands to set up a debugger process.""" + raise NotImplementedError("dump_info must be implemented in OS-specific subclasses") + + @abstractmethod + def _process_specific(self, pinfo: Pinfo, take_dump: bool, logger: logging.Logger = None): + """ + Return the commands that attach to each process, dump info and detach. + + :param pinfo: A Pinfo describing the process + :param take_dump: Whether to take a core dump + :param logger: Logger to output dump info to + """ + raise NotImplementedError("dump_info must be implemented in OS-specific subclasses") + + @abstractmethod + def _postfix(self): + """Return the commands to exit the debugger.""" + raise NotImplementedError("dump_info must be implemented in OS-specific subclasses") + class WindowsDumper(Dumper): """WindowsDumper class.""" - @staticmethod - def __find_debugger(logger, debugger): + def _find_debugger(self, debugger): """Find the installed debugger.""" # We are looking for c:\Program Files (x86)\Windows Kits\8.1\Debuggers\x64 cdb = spawn.find_executable(debugger) @@ -74,54 +123,80 @@ class WindowsDumper(Dumper): os.path.join(root_dir, "Windows Kits", "8." + str(idx), "Debuggers", "x64")) for dbg_path in debugger_paths: - logger.info("Checking for debugger in %s", dbg_path) + self._root_logger.info("Checking for debugger in %s", dbg_path) if os.path.exists(dbg_path): return os.path.join(dbg_path, debugger) return None - def dump_info( # pylint: disable=too-many-arguments - self, root_logger, logger, pinfo, take_dump): - """Dump useful information to the console.""" - debugger = "cdb.exe" - dbg = self.__find_debugger(root_logger, debugger) + def _prefix(self): + """Return the commands to set up a debugger process.""" + cmds = [ + ".symfix", # Fixup symbol path + "!sym noisy", # Enable noisy symbol loading + ".symopt +0x10", # Enable line loading (off by default in CDB, on by default in WinDBG) + ".reload", # Reload symbols + ] - if dbg is None: - root_logger.warning("Debugger %s not found, skipping dumping of %d", debugger, - pinfo.pid) - return + return cmds - root_logger.info("Debugger %s, analyzing %s process with PID %d", dbg, pinfo.name, - pinfo.pid) + def _process_specific(self, pinfo, take_dump, logger=None): + """Return the commands that attach to each process, dump info and detach.""" + assert isinstance(pinfo.pidv, int) dump_command = "" if take_dump: # Dump to file, dump_<process name>.<pid>.mdmp - dump_file = "dump_%s.%d.%s" % (os.path.splitext(pinfo.name)[0], pinfo.pid, + dump_file = "dump_%s.%d.%s" % (os.path.splitext(pinfo.name)[0], pinfo.pidv, self.get_dump_ext()) dump_command = ".dump /ma %s" % dump_file - root_logger.info("Dumping core to %s", dump_file) + self._root_logger.info("Dumping core to %s", dump_file) cmds = [ - ".symfix", # Fixup symbol path - "!sym noisy", # Enable noisy symbol loading - ".symopt +0x10", # Enable line loading (off by default in CDB, on by default in WinDBG) - ".reload", # Reload symbols "!peb", # Dump current exe, & environment variables "lm", # Dump loaded modules dump_command, "!uniqstack -pn", # Dump All unique Threads with function arguments "!cs -l", # Dump all locked critical sections ".detach", # Detach + ] + + return cmds + + def _postfix(self): + """Return the commands to exit the debugger.""" + cmds = [ "q" # Quit ] - call([dbg, '-c', ";".join(cmds), '-p', str(pinfo.pid)], logger) + return cmds - root_logger.info("Done analyzing %s process with PID %d", pinfo.name, pinfo.pid) + def dump_info( # pylint: disable=too-many-arguments + self, pinfo, take_dump): + """Dump useful information to the console.""" + debugger = "cdb.exe" + dbg = self._find_debugger(debugger) - @staticmethod - def get_dump_ext(): + if dbg is None: + self._root_logger.warning("Debugger %s not found, skipping dumping of %s", debugger, + str(pinfo.pidv)) + return + + self._root_logger.info("Debugger %s, analyzing %s processes with PIDs %s", dbg, pinfo.name, + str(pinfo.pidv)) + + # TODO: SERVER-48449 + for pid in pinfo.pidv: + logger = _get_process_logger(self._dbg_output, pinfo.name, pid=pid) + + process = Pinfo(name=pinfo.name, pidv=pid) + cmds = self._prefix() + self._process_specific(process, take_dump) + self._postfix() + + call([dbg, '-c', ";".join(cmds), '-p', str(pid)], logger) + + self._root_logger.info("Done analyzing %s process with PID %d", pinfo.name, pid) + + def get_dump_ext(self): """Return the dump file extension.""" return "mdmp" @@ -131,23 +206,56 @@ class LLDBDumper(Dumper): """LLDBDumper class.""" @staticmethod - def __find_debugger(debugger): + def _find_debugger(debugger): """Find the installed debugger.""" return find_program(debugger, ['/usr/bin']) - def dump_info( # pylint: disable=too-many-arguments,too-many-locals - self, root_logger, logger, pinfo, take_dump): + def _prefix(self): + pass + + def _process_specific(self, pinfo, take_dump, logger=None): + """Return the commands that attach to each process, dump info and detach.""" + cmds = [] + for pid in pinfo.pidv: + dump_command = "" + if take_dump: + # Dump to file, dump_<process name>.<pid>.core + dump_file = "dump_%s.%d.%s" % (pinfo.name, pid, self.get_dump_ext()) + dump_command = "process save-core %s" % dump_file + self._root_logger.info("Dumping core to %s", dump_file) + + cmds += [ + "attach -p %d" % pid, + "target modules list", + "thread backtrace all", + dump_command, + "process detach", + ] + + return cmds + + def _postfix(self): + """Return the commands to exit the debugger.""" + cmds = [ + "settings set interpreter.prompt-on-quit false", + "quit", + ] + + return cmds + + def dump_info(self, pinfo, take_dump): """Dump info.""" debugger = "lldb" - dbg = self.__find_debugger(debugger) + dbg = self._find_debugger(debugger) + logger = _get_process_logger(self._dbg_output, pinfo.name) if dbg is None: - root_logger.warning("Debugger %s not found, skipping dumping of %d", debugger, - pinfo.pid) + self._root_logger.warning("Debugger %s not found, skipping dumping of %s", debugger, + str(pinfo.pidv)) return - root_logger.info("Debugger %s, analyzing %s process with PID %d", dbg, pinfo.name, - pinfo.pid) + self._root_logger.info("Debugger %s, analyzing %s processes with PIDs %s", dbg, pinfo.name, + str(pinfo.pidv)) lldb_version = callo([dbg, "--version"], logger) @@ -167,21 +275,7 @@ class LLDBDumper(Dumper): logger.warning("Debugger lldb is too old, please upgrade to XCode 7.2") return - dump_command = "" - if take_dump: - # Dump to file, dump_<process name>.<pid>.core - dump_file = "dump_%s.%d.%s" % (pinfo.name, pinfo.pid, self.get_dump_ext()) - dump_command = "process save-core %s" % dump_file - root_logger.info("Dumping core to %s", dump_file) - - cmds = [ - "attach -p %d" % pinfo.pid, - "target modules list", - "thread backtrace all", - dump_command, - "settings set interpreter.prompt-on-quit false", - "quit", - ] + cmds = self._process_specific(pinfo, take_dump) + self._postfix() tf = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8') @@ -195,10 +289,10 @@ class LLDBDumper(Dumper): call(['cat', tf.name], logger) call([dbg, '--source', tf.name], logger) - root_logger.info("Done analyzing %s process with PID %d", pinfo.name, pinfo.pid) + self._root_logger.info("Done analyzing %s processes with PIDs %s", pinfo.name, + str(pinfo.pidv)) - @staticmethod - def get_dump_ext(): + def get_dump_ext(self): """Return the dump file extension.""" return "core" @@ -207,35 +301,13 @@ class LLDBDumper(Dumper): class GDBDumper(Dumper): """GDBDumper class.""" - @staticmethod - def __find_debugger(debugger): + def _find_debugger(self, debugger): """Find the installed debugger.""" return find_program(debugger, ['/opt/mongodbtoolchain/gdb/bin', '/usr/bin']) - def dump_info( # pylint: disable=too-many-arguments,too-many-locals - self, root_logger, logger, pinfo, take_dump): - """Dump info.""" - debugger = "gdb" - dbg = self.__find_debugger(debugger) - - if dbg is None: - logger.warning("Debugger %s not found, skipping dumping of %d", debugger, pinfo.pid) - return - - root_logger.info("Debugger %s, analyzing %s process with PID %d", dbg, pinfo.name, - pinfo.pid) - - dump_command = "" - if take_dump: - # Dump to file, dump_<process name>.<pid>.core - dump_file = "dump_%s.%d.%s" % (pinfo.name, pinfo.pid, self.get_dump_ext()) - dump_command = "gcore %s" % dump_file - root_logger.info("Dumping core to %s", dump_file) - - call([dbg, "--version"], logger) - + def _prefix(self): + """Return the commands to set up a debugger process.""" script_dir = "buildscripts" - root_logger.info("dir %s", script_dir) gdb_dir = os.path.join(script_dir, "gdb") mongo_script = os.path.join(gdb_dir, "mongo.py") mongo_printers_script = os.path.join(gdb_dir, "mongo_printers.py") @@ -244,11 +316,23 @@ class GDBDumper(Dumper): source_mongo = "source %s" % mongo_script source_mongo_printers = "source %s" % mongo_printers_script source_mongo_lock = "source %s" % mongo_lock_script + + cmds = [ + "set interactive-mode off", + "set print thread-events off", # Suppress GDB messages of threads starting/finishing. + "set python print-stack full", + source_mongo, + source_mongo_printers, + source_mongo_lock, + ] + return cmds + + def _process_specific( # pylint: disable=too-many-locals + self, pinfo, take_dump, logger=None): + """Return the commands that attach to each process, dump info and detach.""" mongodb_dump_locks = "mongodb-dump-locks" mongodb_show_locks = "mongodb-show-locks" mongodb_uniqstack = "mongodb-uniqstack mongodb-bt-if-active" - mongodb_waitsfor_graph = "mongodb-waitsfor-graph debugger_waitsfor_%s_%d.gv" % \ - (pinfo.name, pinfo.pid) mongodb_javascript_stack = "mongodb-javascript-stack" mongod_dump_sessions = "mongod-dump-sessions" mongodb_dump_mutexes = "mongodb-dump-mutexes" @@ -269,39 +353,69 @@ class GDBDumper(Dumper): 'set logging off', ] - cmds = [ - "set interactive-mode off", - "set print thread-events off", # Suppress GDB messages of threads starting/finishing. - "attach %d" % pinfo.pid, - "info sharedlibrary", - "info threads", # Dump a simple list of commands to get the thread name - "set python print-stack full", - ] + raw_stacks_commands + [ - source_mongo, - source_mongo_printers, - source_mongo_lock, - mongodb_uniqstack, - # Lock the scheduler, before running commands, which execute code in the attached process. - "set scheduler-locking on", - dump_command, - mongodb_dump_locks, - mongodb_show_locks, - mongodb_waitsfor_graph, - mongodb_javascript_stack, - mongod_dump_sessions, - mongodb_dump_mutexes, - mongodb_dump_recovery_units, - "set confirm off", - "quit", - ] + cmds = [] + for pid in pinfo.pidv: + dump_command = "" + if take_dump: + # Dump to file, dump_<process name>.<pid>.core + dump_file = "dump_%s.%d.%s" % (pinfo.name, pid, self.get_dump_ext()) + dump_command = "gcore %s" % dump_file + self._root_logger.info("Dumping core to %s", dump_file) + + mongodb_waitsfor_graph = "mongodb-waitsfor-graph debugger_waitsfor_%s_%d.gv" % \ + (pinfo.name, pid) + + cmds += [ + "attach %d" % pid, + "info sharedlibrary", + "info threads", # Dump a simple list of commands to get the thread name + ] + raw_stacks_commands + [ + mongodb_uniqstack, + # Lock the scheduler, before running commands, which execute code in the attached process. + "set scheduler-locking on", + dump_command, + mongodb_dump_locks, + mongodb_show_locks, + mongodb_waitsfor_graph, + mongodb_javascript_stack, + mongod_dump_sessions, + mongodb_dump_mutexes, + mongodb_dump_recovery_units, + "detach", + ] + + return cmds + + def _postfix(self): + """Return the commands to exit the debugger.""" + cmds = ["set confirm off", "quit"] + return cmds + + def dump_info(self, pinfo, take_dump): + """Dump info.""" + debugger = "gdb" + dbg = self._find_debugger(debugger) + logger = _get_process_logger(self._dbg_output, pinfo.name) + + if dbg is None: + self._root_logger.warning("Debugger %s not found, skipping dumping of %s", debugger, + str(pinfo.pidv)) + return + + self._root_logger.info("Debugger %s, analyzing %s processes with PIDs %s", dbg, pinfo.name, + str(pinfo.pidv)) + + call([dbg, "--version"], logger) + + cmds = self._prefix() + self._process_specific(pinfo, take_dump, logger) + self._postfix() call([dbg, "--quiet", "--nx"] + list( itertools.chain.from_iterable([['-ex', b] for b in cmds])), logger) - root_logger.info("Done analyzing %s process with PID %d", pinfo.name, pinfo.pid) + self._root_logger.info("Done analyzing %s processes with PIDs %s", pinfo.name, + str(pinfo.pidv)) - @staticmethod - def get_dump_ext(): + def get_dump_ext(self): """Return the dump file extension.""" return "core" @@ -320,14 +434,15 @@ class JstackDumper(object): """JstackDumper class.""" @staticmethod - def __find_debugger(debugger): + def _find_debugger(debugger): """Find the installed jstack debugger.""" return find_program(debugger, ['/usr/bin']) - def dump_info(self, root_logger, logger, pid, process_name): + def dump_info(self, root_logger, dbg_output, pid, process_name): """Dump java thread stack traces to the console.""" debugger = "jstack" - jstack = self.__find_debugger(debugger) + jstack = self._find_debugger(debugger) + logger = _get_process_logger(dbg_output, process_name, pid=pid) if jstack is None: logger.warning("Debugger %s not found, skipping dumping of %d", debugger, pid) @@ -349,3 +464,26 @@ class JstackWindowsDumper(object): """Dump java thread stack traces to the logger.""" root_logger.warning("Debugger jstack not supported, skipping dumping of %d", pid) + + +def _get_process_logger(dbg_output, pname: str, pid: int = None): + """Return the process logger from options specified.""" + process_logger = logging.Logger("process", level=logging.DEBUG) + process_logger.mongo_process_filename = None + + if 'stdout' in dbg_output: + s_handler = logging.StreamHandler(sys.stdout) + s_handler.setFormatter(logging.Formatter(fmt="%(message)s")) + process_logger.addHandler(s_handler) + + if 'file' in dbg_output: + if pid: + filename = "debugger_%s_%s.log" % (os.path.splitext(pname)[0], pid) + else: + filename = "debugger_%s.log" % (os.path.splitext(pname)[0]) + process_logger.mongo_process_filename = filename + f_handler = logging.FileHandler(filename=filename, mode="w") + f_handler.setFormatter(logging.Formatter(fmt="%(message)s")) + process_logger.addHandler(f_handler) + + return process_logger diff --git a/buildscripts/resmokelib/hang_analyzer/hang_analyzer.py b/buildscripts/resmokelib/hang_analyzer/hang_analyzer.py index 73c6be95a32..ebc5848a478 100755 --- a/buildscripts/resmokelib/hang_analyzer/hang_analyzer.py +++ b/buildscripts/resmokelib/hang_analyzer/hang_analyzer.py @@ -56,7 +56,7 @@ class HangAnalyzer(Subcommand): self._log_system_info() extractor.extract_debug_symbols(self.root_logger) - dumpers = dumper.get_dumpers() + dumpers = dumper.get_dumpers(self.root_logger, self.options.debugger_output) processes = process_list.get_processes(self.process_ids, self.interesting_processes, self.options.process_match, self.root_logger) @@ -66,16 +66,16 @@ class HangAnalyzer(Subcommand): # Dump python processes by signalling them. The resmoke.py process will generate # the report.json, when signalled, so we do this before attaching to other processes. for pinfo in [pinfo for pinfo in processes if pinfo.name.startswith("python")]: - signal_python(self.root_logger, pinfo) + for pid in pinfo.pidv: + signal_python(self.root_logger, pinfo.name, pid) trapped_exceptions = [] # Dump all processes, except python & java. for pinfo in [pinfo for pinfo in processes if not re.match("^(java|python)", pinfo.name)]: - process_logger = self._get_process_logger(pinfo) try: dumpers.dbg.dump_info( - self.root_logger, process_logger, pinfo, self.options.dump_core + pinfo, self.options.dump_core and _check_dump_quota(max_dump_size_bytes, dumpers.dbg.get_dump_ext())) except Exception as err: # pylint: disable=broad-except self.root_logger.info("Error encountered when invoking debugger %s", err) @@ -83,21 +83,23 @@ class HangAnalyzer(Subcommand): # Dump java processes using jstack. for pinfo in [pinfo for pinfo in processes if pinfo.name.startswith("java")]: - process_logger = self._get_process_logger(pinfo) - try: - dumpers.jstack.dump_info(self.root_logger, pinfo.pid) - except Exception as err: # pylint: disable=broad-except - self.root_logger.info("Error encountered when invoking debugger %s", err) - trapped_exceptions.append(traceback.format_exc()) + for pid in pinfo.pidv: + try: + dumpers.jstack.dump_info(self.root_logger, self.options.debugger_output, + pinfo.name, pid) + except Exception as err: # pylint: disable=broad-except + self.root_logger.info("Error encountered when invoking debugger %s", err) + trapped_exceptions.append(traceback.format_exc()) # Signal go processes to ensure they print out stack traces, and die on POSIX OSes. # On Windows, this will simply kill the process since python emulates SIGABRT as # TerminateProcess. # Note: The stacktrace output may be captured elsewhere (i.e. resmoke). for pinfo in [pinfo for pinfo in processes if pinfo.name in self.go_processes]: - self.root_logger.info("Sending signal SIGABRT to go process %s with PID %d", pinfo.name, - pinfo.pid) - signal_process(self.root_logger, pinfo.pid, signal.SIGABRT) + for pid in pinfo.pidv: + self.root_logger.info("Sending signal SIGABRT to go process %s with PID %d", + pinfo.name, pid) + signal_process(self.root_logger, pid, signal.SIGABRT) self.root_logger.info("Done analyzing all processes for hangs") @@ -155,25 +157,6 @@ class HangAnalyzer(Subcommand): self.root_logger.warning( "Cannot determine Unix Current Login, not supported on Windows") - def _get_process_logger(self, pinfo): - """Return the process logger from options specified.""" - process_logger = logging.Logger("process", level=logging.DEBUG) - process_logger.mongo_process_filename = None - - if 'stdout' in self.options.debugger_output: - s_handler = logging.StreamHandler(sys.stdout) - s_handler.setFormatter(logging.Formatter(fmt="%(message)s")) - process_logger.addHandler(s_handler) - - if 'file' in self.options.debugger_output: - filename = "debugger_%s_%d.log" % (os.path.splitext(pinfo.name)[0], pinfo.pid) - process_logger.mongo_process_filename = filename - f_handler = logging.FileHandler(filename=filename, mode="w") - f_handler.setFormatter(logging.Formatter(fmt="%(message)s")) - process_logger.addHandler(f_handler) - - return process_logger - def _check_dump_quota(quota, ext): """Check if sum of the files with ext is within the specified quota in megabytes.""" diff --git a/buildscripts/resmokelib/hang_analyzer/process.py b/buildscripts/resmokelib/hang_analyzer/process.py index 0869b5ffcd7..bc5ccb3d55f 100644 --- a/buildscripts/resmokelib/hang_analyzer/process.py +++ b/buildscripts/resmokelib/hang_analyzer/process.py @@ -50,19 +50,23 @@ def callo(args, logger): return subprocess.check_output(args).decode('utf-8', 'replace') -def signal_python(logger, pinfo): - """Send appropriate dumping signal to python processes.""" +def signal_python(logger, pname, pid): + """ + Send appropriate dumping signal to python processes. + + :param logger: Where to log output + :param pname: name of the python process. + :param pid: python process pid to signal. + """ # On Windows, we set up an event object to wait on a signal. For Cygwin, we register # a signal handler to wait for the signal since it supports POSIX signals. if _IS_WINDOWS: - logger.info("Calling SetEvent to signal python process %s with PID %d", pinfo.name, - pinfo.pid) - signal_event_object(logger, pinfo.pid) + logger.info("Calling SetEvent to signal python process %s with PID %d", pname, pid) + signal_event_object(logger, pid) else: - logger.info("Sending signal SIGUSR1 to python process %s with PID %d", pinfo.name, - pinfo.pid) - signal_process(logger, pinfo.pid, signal.SIGUSR1) + logger.info("Sending signal SIGUSR1 to python process %s with PID %d", pname, pid) + signal_process(logger, pid, signal.SIGUSR1) def signal_event_object(logger, pid): diff --git a/buildscripts/resmokelib/hang_analyzer/process_list.py b/buildscripts/resmokelib/hang_analyzer/process_list.py index 3f60a54dd08..2fe14f84d68 100644 --- a/buildscripts/resmokelib/hang_analyzer/process_list.py +++ b/buildscripts/resmokelib/hang_analyzer/process_list.py @@ -4,11 +4,16 @@ import os import io import sys import csv -from collections import namedtuple +from typing import List, NamedTuple, Union from buildscripts.resmokelib.hang_analyzer.process import call, callo, find_program -Pinfo = namedtuple('Pinfo', ['pid', 'name']) + +class Pinfo(NamedTuple): + """Holds a vector of PIDs of the same process type.""" + + name: str + pidv: Union[int, List[int]] def get_processes(process_ids, interesting_processes, process_match, logger): @@ -22,35 +27,48 @@ def get_processes(process_ids, interesting_processes, process_match, logger): :param interesting_processes: List of process names to match on. :param process_match: String describing the process match to use. :param logger: Where to log output. + :param all_processes: List of all running (pid, process_name) pairs to search through. :return: A list Pinfo objects for matched processes. """ ps = _get_lister() - all_processes = ps.dump_processes(logger) # Canonicalize the process names to lowercase to handle cases where the name of the Python # process is /System/Library/.../Python on OS X and -p python is specified to the hang analyzer. - all_processes = [(pid, process_name.lower()) for (pid, process_name) in all_processes] + all_processes = [ + Pinfo(name=process_name.lower(), pidv=pid) for (pid, process_name) in all_processes + ] if process_ids: - processes = [ - Pinfo(pid=pid, name=pname) for (pid, pname) in all_processes - if pid in process_ids and pid != os.getpid() - ] - - running_pids = {pid for (pid, pname) in all_processes} + running_pids = {pidv for (pname, pidv) in all_processes} missing_pids = set(process_ids) - running_pids if missing_pids: logger.warning("The following requested process ids are not running %s", list(missing_pids)) - else: - processes = [ - Pinfo(pid=pid, name=pname) for (pid, pname) in all_processes - if _pname_match(process_match, pname, interesting_processes) and pid != os.getpid() - ] - logger.info("Found %d interesting processes %s", len(processes), processes) + processes_to_keep = [] + for process in all_processes: + if process.pidv == os.getpid(): + continue + + if process_ids and process.pidv not in process_ids: + continue + + if interesting_processes and not _pname_match(process_match, process.name, + interesting_processes): + continue + + processes_to_keep.append(process) + + process_types = {pname for (pname, _) in processes_to_keep} + processes = [ + Pinfo(name=ptype, pidv=[pidv for (pname, pidv) in processes_to_keep if pname == ptype]) + for ptype in process_types + ] + + logger.info("Found %d interesting processes %s", len(processes_to_keep), processes) + return processes diff --git a/buildscripts/tests/resmokelib/hang_analyzer/__init__.py b/buildscripts/tests/resmokelib/hang_analyzer/__init__.py new file mode 100644 index 00000000000..4b7a2bb941b --- /dev/null +++ b/buildscripts/tests/resmokelib/hang_analyzer/__init__.py @@ -0,0 +1 @@ +"""Empty.""" diff --git a/buildscripts/tests/resmokelib/hang_analyzer/test_process_list.py b/buildscripts/tests/resmokelib/hang_analyzer/test_process_list.py new file mode 100644 index 00000000000..28c65e7cb5a --- /dev/null +++ b/buildscripts/tests/resmokelib/hang_analyzer/test_process_list.py @@ -0,0 +1,127 @@ +"""Unit tests for the buildscripts.resmokelib.hang_analyzer.process_list module.""" + +import logging +import unittest + +from mock import Mock, patch + +from buildscripts.resmokelib.hang_analyzer.process_list import Pinfo, get_processes + +# # pylint: disable=missing-docstring + +NS = "buildscripts.resmokelib.hang_analyzer.process_list" + + +def ns(relative_name): # pylint: disable=invalid-name + """Return a full name from a name relative to the test module"s name space.""" + return NS + "." + relative_name + + +class TestGetProcesses(unittest.TestCase): + """Unit tests for the get_processes method.""" + + @patch(ns("os.getpid")) + @patch(ns("_get_lister")) + def test_interesting_processes(self, lister_mock, os_mock): + os_mock.return_value = -1 + lister_mock.return_value.dump_processes.return_value = [ + (1, "python"), + (2, "mongo"), + (3, "python"), + (4, "mongod"), + (5, "java") # this should be ignored. + ] + + process_ids = None + interesting_processes = ['python', 'mongo', 'mongod'] + process_match = "exact" + logger = Mock() + + processes = get_processes(process_ids, interesting_processes, process_match, logger) + + self.assertCountEqual(processes, [ + Pinfo(name="python", pidv=[1, 3]), + Pinfo(name="mongo", pidv=[2]), + Pinfo(name="mongod", pidv=[4]) + ]) + + @patch(ns("os.getpid")) + @patch(ns("_get_lister")) + def test_interesting_processes_and_process_ids(self, lister_mock, os_mock): + os_mock.return_value = -1 + lister_mock.return_value.dump_processes.return_value = [ + (1, "python"), + (2, "mongo"), + (3, "python"), + (4, "mongod"), + (5, "java") # this should be ignored. + ] + + process_ids = [1, 2, 5] + interesting_processes = ['python', 'mongo', 'mongod'] + process_match = "exact" + logger = Mock() + + processes = get_processes(process_ids, interesting_processes, process_match, logger) + + self.assertCountEqual(processes, [ + Pinfo(name="python", pidv=[1]), + Pinfo(name="mongo", pidv=[2]), + ]) + + @patch(ns("os.getpid")) + @patch(ns("_get_lister")) + def test_interesting_processes_contains(self, lister_mock, os_mock): + os_mock.return_value = -1 + lister_mock.return_value.dump_processes.return_value = [ + (1, "python2"), + (2, "mongo"), + (3, "python3"), + (4, "mongod"), + (5, "python"), + (5, "java") # this should be ignored. + ] + + process_ids = None + interesting_processes = ['python', 'mongo', 'mongod'] + process_match = "contains" + logger = Mock() + + processes = get_processes(process_ids, interesting_processes, process_match, logger) + + self.assertCountEqual(processes, [ + Pinfo(name="python", pidv=[5]), + Pinfo(name="python2", pidv=[1]), + Pinfo(name="python3", pidv=[3]), + Pinfo(name="mongo", pidv=[2]), + Pinfo(name="mongod", pidv=[4]) + ]) + + @patch(ns("os.getpid")) + @patch(ns("_get_lister")) + def test_process_ids(self, lister_mock, os_mock): + os_mock.return_value = -1 + lister_mock.return_value.dump_processes.return_value = [ + (1, "python"), + (2, "mongo"), + (3, "python"), + (4, "mongod"), + (5, "mongod"), + (6, "python"), # rest is ignored + (7, "mongod"), + (8, "mongo"), + (9, "java"), + ] + + process_ids = [1, 2, 3, 4, 5] + interesting_processes = [] + process_match = "exact" + logger = Mock() + + processes = get_processes(process_ids, interesting_processes, process_match, logger) + + self.assertCountEqual(processes, [ + Pinfo(name="python", pidv=[1, 3]), + Pinfo(name="mongo", pidv=[2]), + Pinfo(name="mongod", pidv=[4, 5]) + ]) |