diff options
author | Zeno Albisser <zeno.albisser@digia.com> | 2013-08-15 21:46:11 +0200 |
---|---|---|
committer | Zeno Albisser <zeno.albisser@digia.com> | 2013-08-15 21:46:11 +0200 |
commit | 679147eead574d186ebf3069647b4c23e8ccace6 (patch) | |
tree | fc247a0ac8ff119f7c8550879ebb6d3dd8d1ff69 /chromium/tools/find_runtime_symbols | |
download | qtwebengine-chromium-679147eead574d186ebf3069647b4c23e8ccace6.tar.gz |
Initial import.
Diffstat (limited to 'chromium/tools/find_runtime_symbols')
-rw-r--r-- | chromium/tools/find_runtime_symbols/OWNERS | 1 | ||||
-rw-r--r-- | chromium/tools/find_runtime_symbols/PRESUBMIT.py | 45 | ||||
-rw-r--r-- | chromium/tools/find_runtime_symbols/README | 24 | ||||
-rwxr-xr-x | chromium/tools/find_runtime_symbols/find_runtime_symbols.py | 207 | ||||
-rwxr-xr-x | chromium/tools/find_runtime_symbols/prepare_symbol_info.py | 226 | ||||
-rw-r--r-- | chromium/tools/find_runtime_symbols/proc_maps.py | 125 | ||||
-rwxr-xr-x | chromium/tools/find_runtime_symbols/reduce_debugline.py | 68 | ||||
-rw-r--r-- | chromium/tools/find_runtime_symbols/static_symbols.py | 277 |
8 files changed, 973 insertions, 0 deletions
diff --git a/chromium/tools/find_runtime_symbols/OWNERS b/chromium/tools/find_runtime_symbols/OWNERS new file mode 100644 index 00000000000..aeea00ec3e0 --- /dev/null +++ b/chromium/tools/find_runtime_symbols/OWNERS @@ -0,0 +1 @@ +dmikurube@chromium.org diff --git a/chromium/tools/find_runtime_symbols/PRESUBMIT.py b/chromium/tools/find_runtime_symbols/PRESUBMIT.py new file mode 100644 index 00000000000..8d6889ce3f9 --- /dev/null +++ b/chromium/tools/find_runtime_symbols/PRESUBMIT.py @@ -0,0 +1,45 @@ +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Top-level presubmit script for find_runtime_symbols. + +See http://dev.chromium.org/developers/how-tos/depottools/presubmit-scripts for +details on the presubmit API built into gcl. +""" + + +def CommonChecks(input_api, output_api): + import sys + def join(*args): + return input_api.os_path.join(input_api.PresubmitLocalPath(), *args) + + output = [] + sys_path_backup = sys.path + try: + sys.path = [ + join('..', 'find_runtime_symbols'), + ] + sys.path + output.extend(input_api.canned_checks.RunPylint(input_api, output_api)) + finally: + sys.path = sys_path_backup + + output.extend( + input_api.canned_checks.RunUnitTestsInDirectory( + input_api, output_api, + input_api.os_path.join(input_api.PresubmitLocalPath(), 'tests'), + whitelist=[r'.+_test\.py$'])) + + if input_api.is_committing: + output.extend(input_api.canned_checks.PanProjectChecks(input_api, + output_api, + owners_check=False)) + return output + + +def CheckChangeOnUpload(input_api, output_api): + return CommonChecks(input_api, output_api) + + +def CheckChangeOnCommit(input_api, output_api): + return CommonChecks(input_api, output_api) diff --git a/chromium/tools/find_runtime_symbols/README b/chromium/tools/find_runtime_symbols/README new file mode 100644 index 00000000000..ee5c2ac88ca --- /dev/null +++ b/chromium/tools/find_runtime_symbols/README @@ -0,0 +1,24 @@ +This script maps runtime addresses to symbol names. It is robust over +Address Space Layout Randomization (ASLR) since it uses runtime addresses with +runtime mapping information (/proc/.../maps). +Like 'pprof --symbols' in gperftools <http://code.google.com/p/gperftools/>. + + +Step 1: Prepare symbol information. + +It is required to collect symbol information before mapping runtime addresses +to symbol names. + +./prepare_symbol_info.py /path/to/maps [/another/path/to/symbol_info_dir] + +The required 'maps' file is /proc/.../maps of the process at runtime. + + +Step 2: Find symbols. + +./find_runtime_symbols.py /path/to/symbol_info_dir < addresses.txt + +'symbol_info_dir' is the result of the Step 1. +The stdin should be a list of hex addresses to map, one per line. + +The results will be printed to stdout like 'pprof --symbols'. diff --git a/chromium/tools/find_runtime_symbols/find_runtime_symbols.py b/chromium/tools/find_runtime_symbols/find_runtime_symbols.py new file mode 100755 index 00000000000..bed9e800b17 --- /dev/null +++ b/chromium/tools/find_runtime_symbols/find_runtime_symbols.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Find symbols in a binary corresponding to given runtime virtual addresses. + +Note that source file names are treated as symbols in this script while they +are actually not. +""" + +import json +import logging +import os +import sys + +from static_symbols import StaticSymbolsInFile +from proc_maps import ProcMaps + +try: + from collections import OrderedDict # pylint: disable=E0611 +except ImportError: + BASE_PATH = os.path.dirname(os.path.abspath(__file__)) + SIMPLEJSON_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir, 'third_party') + sys.path.insert(0, SIMPLEJSON_PATH) + from simplejson import OrderedDict + + +FUNCTION_SYMBOLS = 0 +SOURCEFILE_SYMBOLS = 1 +TYPEINFO_SYMBOLS = 2 + +_MAPS_FILENAME = 'maps' +_FILES_FILENAME = 'files.json' + + +class RuntimeSymbolsInProcess(object): + def __init__(self): + self._maps = None + self._static_symbols_in_filse = {} + + def find_procedure(self, runtime_address): + for vma in self._maps.iter(ProcMaps.executable): + if vma.begin <= runtime_address < vma.end: + static_symbols = self._static_symbols_in_filse.get(vma.name) + if static_symbols: + return static_symbols.find_procedure_by_runtime_address( + runtime_address, vma) + else: + return None + return None + + def find_sourcefile(self, runtime_address): + for vma in self._maps.iter(ProcMaps.executable): + if vma.begin <= runtime_address < vma.end: + static_symbols = self._static_symbols_in_filse.get(vma.name) + if static_symbols: + return static_symbols.find_sourcefile_by_runtime_address( + runtime_address, vma) + else: + return None + return None + + def find_typeinfo(self, runtime_address): + for vma in self._maps.iter(ProcMaps.constants): + if vma.begin <= runtime_address < vma.end: + static_symbols = self._static_symbols_in_filse.get(vma.name) + if static_symbols: + return static_symbols.find_typeinfo_by_runtime_address( + runtime_address, vma) + else: + return None + return None + + @staticmethod + def load(prepared_data_dir): + symbols_in_process = RuntimeSymbolsInProcess() + + with open(os.path.join(prepared_data_dir, _MAPS_FILENAME), mode='r') as f: + symbols_in_process._maps = ProcMaps.load(f) + with open(os.path.join(prepared_data_dir, _FILES_FILENAME), mode='r') as f: + files = json.load(f) + + # pylint: disable=W0212 + for vma in symbols_in_process._maps.iter(ProcMaps.executable_and_constants): + file_entry = files.get(vma.name) + if not file_entry: + continue + + static_symbols = StaticSymbolsInFile(vma.name) + + nm_entry = file_entry.get('nm') + if nm_entry and nm_entry['format'] == 'bsd': + with open(os.path.join(prepared_data_dir, nm_entry['file']), 'r') as f: + static_symbols.load_nm_bsd(f, nm_entry['mangled']) + + readelf_entry = file_entry.get('readelf-e') + if readelf_entry: + with open(os.path.join(prepared_data_dir, readelf_entry['file']), + 'r') as f: + static_symbols.load_readelf_ew(f) + + decodedline_file_entry = file_entry.get('readelf-debug-decodedline-file') + if decodedline_file_entry: + with open(os.path.join(prepared_data_dir, + decodedline_file_entry['file']), 'r') as f: + static_symbols.load_readelf_debug_decodedline_file(f) + + symbols_in_process._static_symbols_in_filse[vma.name] = static_symbols + + return symbols_in_process + + +def _find_runtime_function_symbols(symbols_in_process, addresses): + result = OrderedDict() + for address in addresses: + if isinstance(address, basestring): + address = int(address, 16) + found = symbols_in_process.find_procedure(address) + if found: + result[address] = found.name + else: + result[address] = '0x%016x' % address + return result + + +def _find_runtime_sourcefile_symbols(symbols_in_process, addresses): + result = OrderedDict() + for address in addresses: + if isinstance(address, basestring): + address = int(address, 16) + found = symbols_in_process.find_sourcefile(address) + if found: + result[address] = found + else: + result[address] = '' + return result + + +def _find_runtime_typeinfo_symbols(symbols_in_process, addresses): + result = OrderedDict() + for address in addresses: + if isinstance(address, basestring): + address = int(address, 16) + if address == 0: + result[address] = 'no typeinfo' + else: + found = symbols_in_process.find_typeinfo(address) + if found: + if found.startswith('typeinfo for '): + result[address] = found[13:] + else: + result[address] = found + else: + result[address] = '0x%016x' % address + return result + + +_INTERNAL_FINDERS = { + FUNCTION_SYMBOLS: _find_runtime_function_symbols, + SOURCEFILE_SYMBOLS: _find_runtime_sourcefile_symbols, + TYPEINFO_SYMBOLS: _find_runtime_typeinfo_symbols, + } + + +def find_runtime_symbols(symbol_type, symbols_in_process, addresses): + return _INTERNAL_FINDERS[symbol_type](symbols_in_process, addresses) + + +def main(): + # FIX: Accept only .pre data + if len(sys.argv) < 2: + sys.stderr.write("""Usage: +%s /path/to/prepared_data_dir/ < addresses.txt +""" % sys.argv[0]) + return 1 + + log = logging.getLogger('find_runtime_symbols') + log.setLevel(logging.WARN) + handler = logging.StreamHandler() + handler.setLevel(logging.WARN) + formatter = logging.Formatter('%(message)s') + handler.setFormatter(formatter) + log.addHandler(handler) + + prepared_data_dir = sys.argv[1] + if not os.path.exists(prepared_data_dir): + log.warn("Nothing found: %s" % prepared_data_dir) + return 1 + if not os.path.isdir(prepared_data_dir): + log.warn("Not a directory: %s" % prepared_data_dir) + return 1 + + symbols_in_process = RuntimeSymbolsInProcess.load(prepared_data_dir) + symbols_dict = find_runtime_symbols(FUNCTION_SYMBOLS, + symbols_in_process, + sys.stdin) + for address, symbol in symbols_dict: + if symbol: + print '%016x %s' % (address, symbol) + else: + print '%016x' % address + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/chromium/tools/find_runtime_symbols/prepare_symbol_info.py b/chromium/tools/find_runtime_symbols/prepare_symbol_info.py new file mode 100755 index 00000000000..d5503881a1c --- /dev/null +++ b/chromium/tools/find_runtime_symbols/prepare_symbol_info.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import hashlib +import json +import logging +import os +import re +import shutil +import subprocess +import sys +import tempfile + +from proc_maps import ProcMaps + + +BASE_PATH = os.path.dirname(os.path.abspath(__file__)) +REDUCE_DEBUGLINE_PATH = os.path.join(BASE_PATH, 'reduce_debugline.py') +LOGGER = logging.getLogger('prepare_symbol_info') + + +def _dump_command_result(command, output_dir_path, basename, suffix): + handle_out, filename_out = tempfile.mkstemp( + suffix=suffix, prefix=basename + '.', dir=output_dir_path) + handle_err, filename_err = tempfile.mkstemp( + suffix=suffix + '.err', prefix=basename + '.', dir=output_dir_path) + error = False + try: + subprocess.check_call( + command, stdout=handle_out, stderr=handle_err, shell=True) + except (OSError, subprocess.CalledProcessError): + error = True + finally: + os.close(handle_err) + os.close(handle_out) + + if os.path.exists(filename_err): + if LOGGER.getEffectiveLevel() <= logging.DEBUG: + with open(filename_err, 'r') as f: + for line in f: + LOGGER.debug(line.rstrip()) + os.remove(filename_err) + + if os.path.exists(filename_out) and ( + os.path.getsize(filename_out) == 0 or error): + os.remove(filename_out) + return None + + if not os.path.exists(filename_out): + return None + + return filename_out + + +def prepare_symbol_info(maps_path, + output_dir_path=None, + alternative_dirs=None, + use_tempdir=False, + use_source_file_name=False): + """Prepares (collects) symbol information files for find_runtime_symbols. + + 1) If |output_dir_path| is specified, it tries collecting symbol information + files in the given directory |output_dir_path|. + 1-a) If |output_dir_path| doesn't exist, create the directory and use it. + 1-b) If |output_dir_path| is an empty directory, use it. + 1-c) If |output_dir_path| is a directory which has 'files.json', assumes that + files are already collected and just ignores it. + 1-d) Otherwise, depends on |use_tempdir|. + + 2) If |output_dir_path| is not specified, it tries to create a new directory + depending on 'maps_path'. + + If it cannot create a new directory, creates a temporary directory depending + on |use_tempdir|. If |use_tempdir| is False, returns None. + + Args: + maps_path: A path to a file which contains '/proc/<pid>/maps'. + alternative_dirs: A mapping from a directory '/path/on/target' where the + target process runs to a directory '/path/on/host' where the script + reads the binary. Considered to be used for Android binaries. + output_dir_path: A path to a directory where files are prepared. + use_tempdir: If True, it creates a temporary directory when it cannot + create a new directory. + use_source_file_name: If True, it adds reduced result of 'readelf -wL' + to find source file names. + + Returns: + A pair of a path to the prepared directory and a boolean representing + if it created a temporary directory or not. + """ + alternative_dirs = alternative_dirs or {} + if not output_dir_path: + matched = re.match('^(.*)\.maps$', os.path.basename(maps_path)) + if matched: + output_dir_path = matched.group(1) + '.pre' + if not output_dir_path: + matched = re.match('^/proc/(.*)/maps$', os.path.realpath(maps_path)) + if matched: + output_dir_path = matched.group(1) + '.pre' + if not output_dir_path: + output_dir_path = os.path.basename(maps_path) + '.pre' + # TODO(dmikurube): Find another candidate for output_dir_path. + + used_tempdir = False + LOGGER.info('Data for profiling will be collected in "%s".' % output_dir_path) + if os.path.exists(output_dir_path): + if os.path.isdir(output_dir_path) and not os.listdir(output_dir_path): + LOGGER.warn('Using an empty existing directory "%s".' % output_dir_path) + else: + LOGGER.warn('A file or a directory exists at "%s".' % output_dir_path) + if os.path.exists(os.path.join(output_dir_path, 'files.json')): + LOGGER.warn('Using the existing directory "%s".' % output_dir_path) + return output_dir_path, used_tempdir + else: + if use_tempdir: + output_dir_path = tempfile.mkdtemp() + used_tempdir = True + LOGGER.warn('Using a temporary directory "%s".' % output_dir_path) + else: + LOGGER.warn('The directory "%s" is not available.' % output_dir_path) + return None, used_tempdir + else: + LOGGER.info('Creating a new directory "%s".' % output_dir_path) + try: + os.mkdir(output_dir_path) + except OSError: + LOGGER.warn('A directory "%s" cannot be created.' % output_dir_path) + if use_tempdir: + output_dir_path = tempfile.mkdtemp() + used_tempdir = True + LOGGER.warn('Using a temporary directory "%s".' % output_dir_path) + else: + LOGGER.warn('The directory "%s" is not available.' % output_dir_path) + return None, used_tempdir + + shutil.copyfile(maps_path, os.path.join(output_dir_path, 'maps')) + + with open(maps_path, mode='r') as f: + maps = ProcMaps.load(f) + + LOGGER.debug('Listing up symbols.') + files = {} + for entry in maps.iter(ProcMaps.executable): + LOGGER.debug(' %016x-%016x +%06x %s' % ( + entry.begin, entry.end, entry.offset, entry.name)) + binary_path = entry.name + for target_path, host_path in alternative_dirs.iteritems(): + if entry.name.startswith(target_path): + binary_path = entry.name.replace(target_path, host_path, 1) + nm_filename = _dump_command_result( + 'nm -n --format bsd %s | c++filt' % binary_path, + output_dir_path, os.path.basename(binary_path), '.nm') + if not nm_filename: + continue + readelf_e_filename = _dump_command_result( + 'readelf -eW %s' % binary_path, + output_dir_path, os.path.basename(binary_path), '.readelf-e') + if not readelf_e_filename: + continue + readelf_debug_decodedline_file = None + if use_source_file_name: + readelf_debug_decodedline_file = _dump_command_result( + 'readelf -wL %s | %s' % (binary_path, REDUCE_DEBUGLINE_PATH), + output_dir_path, os.path.basename(binary_path), '.readelf-wL') + + files[entry.name] = {} + files[entry.name]['nm'] = { + 'file': os.path.basename(nm_filename), + 'format': 'bsd', + 'mangled': False} + files[entry.name]['readelf-e'] = { + 'file': os.path.basename(readelf_e_filename)} + if readelf_debug_decodedline_file: + files[entry.name]['readelf-debug-decodedline-file'] = { + 'file': os.path.basename(readelf_debug_decodedline_file)} + + files[entry.name]['size'] = os.stat(binary_path).st_size + + with open(binary_path, 'rb') as entry_f: + md5 = hashlib.md5() + sha1 = hashlib.sha1() + chunk = entry_f.read(1024 * 1024) + while chunk: + md5.update(chunk) + sha1.update(chunk) + chunk = entry_f.read(1024 * 1024) + files[entry.name]['sha1'] = sha1.hexdigest() + files[entry.name]['md5'] = md5.hexdigest() + + with open(os.path.join(output_dir_path, 'files.json'), 'w') as f: + json.dump(files, f, indent=2, sort_keys=True) + + LOGGER.info('Collected symbol information at "%s".' % output_dir_path) + return output_dir_path, used_tempdir + + +def main(): + if not sys.platform.startswith('linux'): + sys.stderr.write('This script work only on Linux.') + return 1 + + LOGGER.setLevel(logging.DEBUG) + handler = logging.StreamHandler() + handler.setLevel(logging.INFO) + formatter = logging.Formatter('%(message)s') + handler.setFormatter(formatter) + LOGGER.addHandler(handler) + + # TODO(dmikurube): Specify |alternative_dirs| from command line. + if len(sys.argv) < 2: + sys.stderr.write("""Usage: +%s /path/to/maps [/path/to/output_data_dir/] +""" % sys.argv[0]) + return 1 + elif len(sys.argv) == 2: + result, _ = prepare_symbol_info(sys.argv[1]) + else: + result, _ = prepare_symbol_info(sys.argv[1], sys.argv[2]) + + return not result + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/chromium/tools/find_runtime_symbols/proc_maps.py b/chromium/tools/find_runtime_symbols/proc_maps.py new file mode 100644 index 00000000000..2d917b32124 --- /dev/null +++ b/chromium/tools/find_runtime_symbols/proc_maps.py @@ -0,0 +1,125 @@ +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import re + + +_MAPS_PATTERN = re.compile( + r'^([a-f0-9]+)-([a-f0-9]+)\s+(.)(.)(.)(.)\s+([a-f0-9]+)\s+(\S+):(\S+)\s+' + r'(\d+)\s*(.*)$', re.IGNORECASE) + + +class ProcMapsEntry(object): + """A class representing one line in /proc/.../maps.""" + + def __init__( + self, begin, end, readable, writable, executable, private, offset, + major, minor, inode, name): + self.begin = begin + self.end = end + self.readable = readable + self.writable = writable + self.executable = executable + self.private = private + self.offset = offset + self.major = major + self.minor = minor + self.inode = inode + self.name = name + + def as_dict(self): + return { + 'begin': self.begin, + 'end': self.end, + 'readable': self.readable, + 'writable': self.writable, + 'executable': self.executable, + 'private': self.private, + 'offset': self.offset, + 'major': self.major, + 'minor': self.minor, + 'inode': self.inode, + 'name': self.name, + } + + +class ProcMaps(object): + """A class representing contents in /proc/.../maps.""" + + def __init__(self): + self._sorted_indexes = [] + self._dictionary = {} + self._sorted = True + + def iter(self, condition): + if not self._sorted: + self._sorted_indexes.sort() + self._sorted = True + for index in self._sorted_indexes: + if not condition or condition(self._dictionary[index]): + yield self._dictionary[index] + + def __iter__(self): + if not self._sorted: + self._sorted_indexes.sort() + self._sorted = True + for index in self._sorted_indexes: + yield self._dictionary[index] + + @staticmethod + def load(f): + table = ProcMaps() + for line in f: + table.append_line(line) + return table + + def append_line(self, line): + entry = self.parse_line(line) + if entry: + self._append_entry(entry) + + @staticmethod + def parse_line(line): + matched = _MAPS_PATTERN.match(line) + if matched: + return ProcMapsEntry( # pylint: disable=W0212 + int(matched.group(1), 16), # begin + int(matched.group(2), 16), # end + matched.group(3), # readable + matched.group(4), # writable + matched.group(5), # executable + matched.group(6), # private + int(matched.group(7), 16), # offset + matched.group(8), # major + matched.group(9), # minor + int(matched.group(10), 10), # inode + matched.group(11) # name + ) + else: + return None + + @staticmethod + def constants(entry): + return (entry.writable == '-' and entry.executable == '-' and re.match( + '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?', + entry.name)) + + @staticmethod + def executable(entry): + return (entry.executable == 'x' and re.match( + '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?', + entry.name)) + + @staticmethod + def executable_and_constants(entry): + return (((entry.writable == '-' and entry.executable == '-') or + entry.executable == 'x') and re.match( + '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?', + entry.name)) + + def _append_entry(self, entry): + if self._sorted_indexes and self._sorted_indexes[-1] > entry.begin: + self._sorted = False + self._sorted_indexes.append(entry.begin) + self._dictionary[entry.begin] = entry diff --git a/chromium/tools/find_runtime_symbols/reduce_debugline.py b/chromium/tools/find_runtime_symbols/reduce_debugline.py new file mode 100755 index 00000000000..75c8c8578d7 --- /dev/null +++ b/chromium/tools/find_runtime_symbols/reduce_debugline.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# Copyright (c) 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Reduces result of 'readelf -wL' to just a list of starting addresses. + +It lists up all addresses where the corresponding source files change. The +list is sorted in ascending order. See tests/reduce_debugline_test.py for +examples. + +This script assumes that the result of 'readelf -wL' ends with an empty line. + +Note: the option '-wL' has the same meaning with '--debug-dump=decodedline'. +""" + +import re +import sys + + +_FILENAME_PATTERN = re.compile('(CU: |)(.+)\:') + + +def reduce_decoded_debugline(input_file): + filename = '' + starting_dict = {} + started = False + + for line in input_file: + line = line.strip() + unpacked = line.split(None, 2) + + if len(unpacked) == 3 and unpacked[2].startswith('0x'): + if not started and filename: + started = True + starting_dict[int(unpacked[2], 16)] = filename + else: + started = False + if line.endswith(':'): + matched = _FILENAME_PATTERN.match(line) + if matched: + filename = matched.group(2) + + starting_list = [] + prev_filename = '' + for address in sorted(starting_dict): + curr_filename = starting_dict[address] + if prev_filename != curr_filename: + starting_list.append((address, starting_dict[address])) + prev_filename = curr_filename + return starting_list + + +def main(): + if len(sys.argv) != 1: + print >> sys.stderr, 'Unsupported arguments' + return 1 + + starting_list = reduce_decoded_debugline(sys.stdin) + bits64 = starting_list[-1][0] > 0xffffffff + for address, filename in starting_list: + if bits64: + print '%016x %s' % (address, filename) + else: + print '%08x %s' % (address, filename) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/chromium/tools/find_runtime_symbols/static_symbols.py b/chromium/tools/find_runtime_symbols/static_symbols.py new file mode 100644 index 00000000000..cd57bacd99a --- /dev/null +++ b/chromium/tools/find_runtime_symbols/static_symbols.py @@ -0,0 +1,277 @@ +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import bisect +import re + + +_ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?') +_TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>') +_LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)') +_READELF_SECTION_HEADER_PATTER = re.compile( + '^\s*\[\s*(Nr|\d+)\]\s+(|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+' + '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+' + '([0-9]+)\s+([0-9]+)\s+([0-9]+)') + + +class ParsingException(Exception): + def __str__(self): + return repr(self.args[0]) + + +class AddressMapping(object): + def __init__(self): + self._symbol_map = {} + + def append(self, start, entry): + self._symbol_map[start] = entry + + def find(self, address): + return self._symbol_map.get(address) + + +class RangeAddressMapping(AddressMapping): + def __init__(self): + super(RangeAddressMapping, self).__init__() + self._sorted_start_list = [] + self._is_sorted = True + + def append(self, start, entry): + if self._sorted_start_list: + if self._sorted_start_list[-1] > start: + self._is_sorted = False + elif self._sorted_start_list[-1] == start: + return + self._sorted_start_list.append(start) + self._symbol_map[start] = entry + + def find(self, address): + if not self._sorted_start_list: + return None + if not self._is_sorted: + self._sorted_start_list.sort() + self._is_sorted = True + found_index = bisect.bisect_left(self._sorted_start_list, address) + found_start_address = self._sorted_start_list[found_index - 1] + return self._symbol_map[found_start_address] + + +class Procedure(object): + """A class for a procedure symbol and an address range for the symbol.""" + + def __init__(self, start, end, name): + self.start = start + self.end = end + self.name = name + + def __eq__(self, other): + return (self.start == other.start and + self.end == other.end and + self.name == other.name) + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return '%x-%x: %s' % (self.start, self.end, self.name) + + +class ElfSection(object): + """A class for an elf section header.""" + + def __init__( + self, number, name, stype, address, offset, size, es, flg, lk, inf, al): + self.number = number + self.name = name + self.stype = stype + self.address = address + self.offset = offset + self.size = size + self.es = es + self.flg = flg + self.lk = lk + self.inf = inf + self.al = al + + def __eq__(self, other): + return (self.number == other.number and + self.name == other.name and + self.stype == other.stype and + self.address == other.address and + self.offset == other.offset and + self.size == other.size and + self.es == other.es and + self.flg == other.flg and + self.lk == other.lk and + self.inf == other.inf and + self.al == other.al) + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name) + + +class StaticSymbolsInFile(object): + """Represents static symbol information in a binary file.""" + + def __init__(self, my_name): + self.my_name = my_name + self._elf_sections = [] + self._procedures = RangeAddressMapping() + self._sourcefiles = RangeAddressMapping() + self._typeinfos = AddressMapping() + + def _append_elf_section(self, elf_section): + self._elf_sections.append(elf_section) + + def _append_procedure(self, start, procedure): + self._procedures.append(start, procedure) + + def _append_sourcefile(self, start, sourcefile): + self._sourcefiles.append(start, sourcefile) + + def _append_typeinfo(self, start, typeinfo): + self._typeinfos.append(start, typeinfo) + + def _find_symbol_by_runtime_address(self, address, vma, target): + if not (vma.begin <= address < vma.end): + return None + + if vma.name != self.my_name: + return None + + file_offset = address - (vma.begin - vma.offset) + elf_address = None + for section in self._elf_sections: + if section.offset <= file_offset < (section.offset + section.size): + elf_address = section.address + file_offset - section.offset + if not elf_address: + return None + + return target.find(elf_address) + + def find_procedure_by_runtime_address(self, address, vma): + return self._find_symbol_by_runtime_address(address, vma, self._procedures) + + def find_sourcefile_by_runtime_address(self, address, vma): + return self._find_symbol_by_runtime_address(address, vma, self._sourcefiles) + + def find_typeinfo_by_runtime_address(self, address, vma): + return self._find_symbol_by_runtime_address(address, vma, self._typeinfos) + + def load_readelf_ew(self, f): + found_header = False + for line in f: + if line.rstrip() == 'Section Headers:': + found_header = True + break + if not found_header: + return None + + for line in f: + line = line.rstrip() + matched = _READELF_SECTION_HEADER_PATTER.match(line) + if matched: + self._append_elf_section(ElfSection( + int(matched.group(1), 10), # number + matched.group(2), # name + matched.group(3), # stype + int(matched.group(4), 16), # address + int(matched.group(5), 16), # offset + int(matched.group(6), 16), # size + matched.group(7), # es + matched.group(8), # flg + matched.group(9), # lk + matched.group(10), # inf + matched.group(11) # al + )) + else: + if line in ('Key to Flags:', 'Program Headers:'): + break + + def load_readelf_debug_decodedline_file(self, input_file): + for line in input_file: + splitted = line.rstrip().split(None, 2) + self._append_sourcefile(int(splitted[0], 16), splitted[1]) + + @staticmethod + def _parse_nm_bsd_line(line): + if line[8] == ' ': + return line[0:8], line[9], line[11:] + elif line[16] == ' ': + return line[0:16], line[17], line[19:] + raise ParsingException('Invalid nm output.') + + @staticmethod + def _get_short_function_name(function): + while True: + function, number = _ARGUMENT_TYPE_PATTERN.subn('', function) + if not number: + break + while True: + function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function) + if not number: + break + return _LEADING_TYPE_PATTERN.sub('\g<1>', function) + + def load_nm_bsd(self, f, mangled=False): + last_start = 0 + routine = '' + + for line in f: + line = line.rstrip() + sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line) + + if sym_value[0] == ' ': + continue + + start_val = int(sym_value, 16) + + if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and + (not mangled and sym_name.startswith('typeinfo'))): + self._append_typeinfo(start_val, sym_name) + + # It's possible for two symbols to share the same address, if + # one is a zero-length variable (like __start_google_malloc) or + # one symbol is a weak alias to another (like __libc_malloc). + # In such cases, we want to ignore all values except for the + # actual symbol, which in nm-speak has type "T". The logic + # below does this, though it's a bit tricky: what happens when + # we have a series of lines with the same address, is the first + # one gets queued up to be processed. However, it won't + # *actually* be processed until later, when we read a line with + # a different address. That means that as long as we're reading + # lines with the same address, we have a chance to replace that + # item in the queue, which we do whenever we see a 'T' entry -- + # that is, a line with type 'T'. If we never see a 'T' entry, + # we'll just go ahead and process the first entry (which never + # got touched in the queue), and ignore the others. + if start_val == last_start and (sym_type == 't' or sym_type == 'T'): + # We are the 'T' symbol at this address, replace previous symbol. + routine = sym_name + continue + elif start_val == last_start: + # We're not the 'T' symbol at this address, so ignore us. + continue + + # Tag this routine with the starting address in case the image + # has multiple occurrences of this routine. We use a syntax + # that resembles template paramters that are automatically + # stripped out by ShortFunctionName() + sym_name += "<%016x>" % start_val + + if not mangled: + routine = self._get_short_function_name(routine) + self._append_procedure( + last_start, Procedure(last_start, start_val, routine)) + + last_start = start_val + routine = sym_name + + if not mangled: + routine = self._get_short_function_name(routine) + self._append_procedure( + last_start, Procedure(last_start, last_start, routine)) |