diff options
author | Andy Schwerin <schwerin@mongodb.com> | 2015-12-18 18:20:18 -0500 |
---|---|---|
committer | Andy Schwerin <schwerin@mongodb.com> | 2015-12-30 13:24:03 -0500 |
commit | fffc3c12ac811b6481a0669fc620f4230a5ce2cb (patch) | |
tree | aaac74aab8e57534a0d7efa5902299c7cd18ee84 /buildscripts/mongosymb.py | |
parent | b45dba8425b49714c5ca1394aba0bb97029c2560 (diff) | |
download | mongo-fffc3c12ac811b6481a0669fc620f4230a5ce2cb.tar.gz |
SERVER-22035 Introduce mongosymb.py stack trace symbolizer.
Diffstat (limited to 'buildscripts/mongosymb.py')
-rwxr-xr-x | buildscripts/mongosymb.py | 183 |
1 files changed, 183 insertions, 0 deletions
diff --git a/buildscripts/mongosymb.py b/buildscripts/mongosymb.py new file mode 100755 index 00000000000..174a805b412 --- /dev/null +++ b/buildscripts/mongosymb.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +"""Script and library for symbolizing MongoDB stack traces. + +To use as a script, paste the JSON object on the line after ----- BEGIN BACKTRACE ----- into the +standard input of this script. There are numerous caveats. In the default mode, you need +to pass in the path to the executable being symbolized, and if you want shared library stack +traces, you must be on the same system. + +There is largely untested support for extracting debug information from S3 buckets. This work +is experimental. + +Sample usage: + +mongosymb.py --symbolizer-path=/path/to/llvm-symbolizer /path/to/executable </file/with/stacktrace + +You can also pass --output-format=json, to get rich json output. It shows some extra information, +but emits json instead of plain text. +""" + +import json +import optparse +import os +import subprocess +import sys + +def symbolize_frames(trace_doc, dbg_path_resolver, symbolizer_path=None, dsym_hint=None): + """Given a trace_doc in MongoDB stack dump format, returns a list of symbolized stack frames. + """ + + if symbolizer_path is None: + symbolizer_path = os.environ.get("MONGOSYMB_SYMBOLIZER_PATH", "llvm-symbolizer") + if dsym_hint is None: + dsym_hint = [] + + def make_base_addr_map(somap_list): + """Makes a map from binary load address to description of library from the somap, which is + a list of dictionaries describing individual loaded libraries. + """ + base_addr_map = {} + for so_entry in somap_list: + base_addr_map[so_entry["b"]] = so_entry + return base_addr_map + + base_addr_map = make_base_addr_map(trace_doc["processInfo"]["somap"]) + + frames = [] + for frame in trace_doc["backtrace"]: + soinfo = base_addr_map.get(frame["b"], {}) + elf_type = soinfo.get("elfType", 0) + if elf_type == 3: + addr_base = "0" + elif elf_type == 2: + addr_base = frame["b"] + else: + addr_base = soinfo.get("vmaddr", "0") + addr = long(addr_base, 16) + long(frame["o"], 16) + frames.append(dict(path=dbg_path_resolver.get_dbg_file(soinfo), + buildId=soinfo.get("buildId", None), + offset=frame["o"], + addr=addr, + symbol=frame.get("s", None))) + + symbolizer_args = [symbolizer_path] + for dh in dsym_hint: + symbolizer_args.append("-dsym-hint=%s" %dh) + symbolizer_process = subprocess.Popen( + args=symbolizer_args, + close_fds=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=open("/dev/null")) + + def extract_symbols(stdin): + """Extracts symbol information from the output of llvm-symbolizer. + + Returns a list of dictionaries, each of which has fn, file, column and line entries. + + The format of llvm-symbolizer output is that for every CODE line of input, + it outputs zero or more pairs of lines, and then a blank line. This way, if + a CODE line of input maps to several inlined functions, you can use the blank + line to find the end of the list of symbols corresponding to the CODE line. + + The first line of each pair contains the function name, and the second contains the file, + column and line information. + """ + result = [] + step = 0 + while True: + line = stdin.readline().decode() + if line == "\n": + break + if step == 0: + result.append({"fn" : line.strip()}) + step = 1 + else: + file_name, line, column = line.strip().rsplit(':', 3) + result[-1].update({"file": file_name, "column": int(column), "line": int(line)}) + step = 0 + return result + + for frame in frames: + if frame["path"] is None: + continue + symbolizer_process.stdin.write("CODE %(path)s 0x%(addr)X\n" % frame) + symbolizer_process.stdin.flush() + frame["symbinfo"] = extract_symbols(symbolizer_process.stdout) + symbolizer_process.stdin.close() + symbolizer_process.wait() + return frames + +class path_dbg_file_resolver(object): + def __init__(self, bin_path_guess): + self._bin_path_guess = bin_path_guess + + def get_dbg_file(self, soinfo): + return soinfo.get("path", self._bin_path_guess) + +class s3_buildid_dbg_file_resolver(object): + def __init__(self, cache_dir, s3_bucket): + self._cache_dir = cache_dir + self._s3_bucket = s3_bucket + + def get_dbg_file(self, soinfo): + buildId = soinfo.get("buildId", None) + if buildId is None: + return None + buildId = buildId.lower() + buildIdPath = os.path.join(self._cache_dir, buildId + ".debug") + if not os.path.exists(buildIdPath): + try: + self._get_from_s3(buildId) + except: + ex = sys.exc_info()[0] + sys.stderr.write("Failed to find debug symbols for %s in s3: %s\n" %(buildId, ex)) + return None + if not os.path.exists(buildIdPath): + return None + return buildIdPath + + def _get_from_s3(self, buildId): + subprocess.check_call( + ['wget', 'https://s3.amazonaws.com/%s/%s.debug.gz' % (self._s3_bucket, buildId)], + cwd=self._cache_dir) + subprocess.check_call(['gunzip', buildId + ".debug.gz"], cwd=self._cache_dir) + +def classic_output(frames, outfile, **kwargs): + for frame in frames: + symbinfo = frame["symbinfo"] + if len(symbinfo) > 0: + for sframe in symbinfo: + outfile.write(" %(file)s:%(line)s %(fn)s\n" % sframe) + else: + outfile.write(" %(path)s!!!\n" % symbinfo) + +def main(argv): + parser = optparse.OptionParser() + parser.add_option("--dsym-hint", action="append", dest="dsym_hint") + parser.add_option("--symbolizer-path", dest="symbolizer_path", default=None) + parser.add_option("--debug-file-resolver", dest="debug_file_resolver", default="path") + parser.add_option("--output-format", dest="output_format", default="classic") + (options, args) = parser.parse_args(argv) + resolver_constructor = dict(path=path_dbg_file_resolver, s3=s3_buildid_dbg_file_resolver).get( + options.debug_file_resolver, None) + if resolver_constructor is None: + sys.stderr.write("Invalid debug-file-resolver argument: %s\n" % options.debug_file_resolver) + sys.exit(1) + + output_fn = dict(json=json.dump, classic=classic_output).get(options.output_format, None) + if output_fn is None: + sys.stderr.write("Invalid output-format argument: %s\n" % options.output_format) + sys.exit(1) + + resolver = resolver_constructor(*args[1:]) + trace_doc = json.load(sys.stdin) + frames = symbolize_frames(trace_doc, + resolver, + symbolizer_path=options.symbolizer_path, + dsym_hint=options.dsym_hint) + output_fn(frames, sys.stdout, indent=2) + +if __name__ == '__main__': + main(sys.argv) + sys.exit(0) |