summaryrefslogtreecommitdiff
path: root/buildscripts/hang_analyzer.py
diff options
context:
space:
mode:
authorJonathan Abrahams <jonathan@mongodb.com>2016-06-15 14:55:39 -0400
committerJonathan Abrahams <jonathan@mongodb.com>2016-06-15 14:55:58 -0400
commit46e47e5aac5bfdf7434fa11127320010f05ef352 (patch)
tree293e1a627edb52a26f73d78019655b3060d7970b /buildscripts/hang_analyzer.py
parent4dc7c842c2c7fb883fe711612a9b81fc72d23520 (diff)
downloadmongo-46e47e5aac5bfdf7434fa11127320010f05ef352.tar.gz
SERVER-24420 Add Jepsen processes to hang_analyzer
Diffstat (limited to 'buildscripts/hang_analyzer.py')
-rwxr-xr-xbuildscripts/hang_analyzer.py80
1 files changed, 61 insertions, 19 deletions
diff --git a/buildscripts/hang_analyzer.py b/buildscripts/hang_analyzer.py
index bdf6e5be0f3..38f2b9f5a03 100755
--- a/buildscripts/hang_analyzer.py
+++ b/buildscripts/hang_analyzer.py
@@ -2,11 +2,12 @@
"""Hang Analyzer
-A prototype hang analyzer for MCI integration to help investigate test timeouts
+A prototype hang analyzer for Evergreen integration to help investigate test timeouts
1. Script supports taking dumps, and/or dumping a summary of useful information about a process
-2. Script will iterate through a list of interesting processes (mongo, mongod, and mongos),
- and run the tools from step 1.
+2. Script will iterate through a list of interesting processes,
+ and run the tools from step 1. The list of processes can be provided as an option.
+3. Java processes will be dumped using jstack, if available.
Supports Linux, MacOS X, Solaris, and Windows.
"""
@@ -16,6 +17,7 @@ import csv
import itertools
import os
import platform
+import re
import signal
import subprocess
import sys
@@ -356,31 +358,61 @@ class SolarisProcessList(object):
return p
+# jstack is a JDK utility
+class JstackDumper(object):
+
+ def __find_debugger(self):
+ """Finds the installed jstack debugger"""
+ return find_program('jstack', ['/usr/bin'])
+
+ def dump_info(self, pid, process_name, stream):
+ """Dump java thread stack traces to the console"""
+ jstack = self.__find_debugger()
+
+ if jstack is None:
+ stream.write("WARNING: Debugger jstack not found, skipping dumping of %d\n" % (pid))
+ return
+
+ stream.write("INFO: Debugger %s, analyzing %d\n" % (jstack, pid))
+
+ call([jstack, "-l", str(pid)])
+
+ stream.write("INFO: Done analyzing process\n")
+
+ def dump_core(self, pid, output_file):
+ """Take a dump of pid to specified file"""
+ sys.stderr.write("ERROR: jstack does not support dumps\n")
+
+
+# jstack is a JDK utility
+class JstackWindowsDumper(object):
+
+ def dump_info(self, pid, process_name, stream):
+ """Dump java thread stack traces to the console"""
+
+ stream.write("WARNING: Debugger jstack not supported, skipping dumping of %d\n" % (pid))
+
def get_hang_analyzers():
dbg = None
+ jstack = None
ps = None
if sys.platform.startswith("linux"):
dbg = GDBDumper()
+ jstack = JstackDumper()
ps = LinuxProcessList()
elif sys.platform.startswith("sunos"):
dbg = GDBDumper()
+ jstack = JstackDumper()
ps = SolarisProcessList()
elif os.name == 'nt' or (os.name == "posix" and sys.platform == "cygwin"):
dbg = WindowsDumper()
+ jstack = JstackWindowsDumper()
ps = WindowsProcessList()
elif sys.platform == "darwin":
dbg = LLDBDumper()
+ jstack = JstackDumper()
ps = DarwinProcessList()
- return [ps, dbg]
-
-
-interesting_processes = ["mongo", "mongod", "mongos", "_test", "dbtest", "python"]
-
-def is_interesting_process(p):
- for ip in interesting_processes:
- if p.find(ip) != -1:
- return True
- return False
+ return [ps, dbg, jstack]
def signal_process(pid):
@@ -432,12 +464,16 @@ def main():
except AttributeError:
print "Cannot determine Unix Current Login, not supported on Windows"
+ interesting_processes = ["mongo", "mongod", "mongos", "_test", "dbtest", "python", "java"]
parser = OptionParser(description=__doc__)
+ parser.add_option('-p', '--process_names', dest='process_names', help='List of process names to analyze')
(options, args) = parser.parse_args()
+ if options.process_names is not None:
+ interesting_processes = options.process_names.split(',')
- [ps, dbg] = get_hang_analyzers()
+ [ps, dbg, jstack] = get_hang_analyzers()
- if( ps == None or dbg == None):
+ if( ps == None or (dbg == None and jstack == None)):
sys.stderr.write("hang_analyzer.py: Unsupported platform: %s\n" % (sys.platform))
exit(1)
@@ -447,9 +483,9 @@ def main():
processes_orig = ps.dump_processes()
- # Pick unit tests which include the phrase "_test"
+ # Find all running interesting processes.
processes = [a for a in processes_orig
- if is_interesting_process(a[1]) and a[0] != os.getpid()]
+ if any([a[1].find(ip) >= 0 for ip in interesting_processes]) and a[0] != os.getpid()]
sys.stdout.write("Found %d interesting processes\n" % len(processes))
@@ -457,11 +493,17 @@ def main():
for process in processes_orig:
sys.stdout.write("Ignoring process %d of %s\n" % (process[0], process[1]))
else:
- # Dump all other processes first since signaling the python script interrupts it
- for process in [a for a in processes if not a[1].startswith("python")]:
+ # Dump all other processes, except python & java.
+ for process in [a for a in processes if not re.match("^(java|python)", a[1])]:
sys.stdout.write("Dumping process %d of %s\n" % (process[0], process[1]))
dbg.dump_info(process[0], process[1], sys.stdout)
+ # Dump java processes using jstack.
+ for process in [a for a in processes if a[1].startswith("java")]:
+ sys.stdout.write("Dumping process %d of %s\n" % (process[0], process[1]))
+ jstack.dump_info(process[0], process[1], sys.stdout)
+
+ # Dump python processes after signalling them.
for process in [a for a in processes if a[1].startswith("python")]:
signal_process(process[0])