summaryrefslogtreecommitdiff
path: root/chromium/tools/find_runtime_symbols
diff options
context:
space:
mode:
authorZeno Albisser <zeno.albisser@digia.com>2013-08-15 21:46:11 +0200
committerZeno Albisser <zeno.albisser@digia.com>2013-08-15 21:46:11 +0200
commit679147eead574d186ebf3069647b4c23e8ccace6 (patch)
treefc247a0ac8ff119f7c8550879ebb6d3dd8d1ff69 /chromium/tools/find_runtime_symbols
downloadqtwebengine-chromium-679147eead574d186ebf3069647b4c23e8ccace6.tar.gz
Initial import.
Diffstat (limited to 'chromium/tools/find_runtime_symbols')
-rw-r--r--chromium/tools/find_runtime_symbols/OWNERS1
-rw-r--r--chromium/tools/find_runtime_symbols/PRESUBMIT.py45
-rw-r--r--chromium/tools/find_runtime_symbols/README24
-rwxr-xr-xchromium/tools/find_runtime_symbols/find_runtime_symbols.py207
-rwxr-xr-xchromium/tools/find_runtime_symbols/prepare_symbol_info.py226
-rw-r--r--chromium/tools/find_runtime_symbols/proc_maps.py125
-rwxr-xr-xchromium/tools/find_runtime_symbols/reduce_debugline.py68
-rw-r--r--chromium/tools/find_runtime_symbols/static_symbols.py277
8 files changed, 973 insertions, 0 deletions
diff --git a/chromium/tools/find_runtime_symbols/OWNERS b/chromium/tools/find_runtime_symbols/OWNERS
new file mode 100644
index 00000000000..aeea00ec3e0
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/OWNERS
@@ -0,0 +1 @@
+dmikurube@chromium.org
diff --git a/chromium/tools/find_runtime_symbols/PRESUBMIT.py b/chromium/tools/find_runtime_symbols/PRESUBMIT.py
new file mode 100644
index 00000000000..8d6889ce3f9
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/PRESUBMIT.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Top-level presubmit script for find_runtime_symbols.
+
+See http://dev.chromium.org/developers/how-tos/depottools/presubmit-scripts for
+details on the presubmit API built into gcl.
+"""
+
+
+def CommonChecks(input_api, output_api):
+ import sys
+ def join(*args):
+ return input_api.os_path.join(input_api.PresubmitLocalPath(), *args)
+
+ output = []
+ sys_path_backup = sys.path
+ try:
+ sys.path = [
+ join('..', 'find_runtime_symbols'),
+ ] + sys.path
+ output.extend(input_api.canned_checks.RunPylint(input_api, output_api))
+ finally:
+ sys.path = sys_path_backup
+
+ output.extend(
+ input_api.canned_checks.RunUnitTestsInDirectory(
+ input_api, output_api,
+ input_api.os_path.join(input_api.PresubmitLocalPath(), 'tests'),
+ whitelist=[r'.+_test\.py$']))
+
+ if input_api.is_committing:
+ output.extend(input_api.canned_checks.PanProjectChecks(input_api,
+ output_api,
+ owners_check=False))
+ return output
+
+
+def CheckChangeOnUpload(input_api, output_api):
+ return CommonChecks(input_api, output_api)
+
+
+def CheckChangeOnCommit(input_api, output_api):
+ return CommonChecks(input_api, output_api)
diff --git a/chromium/tools/find_runtime_symbols/README b/chromium/tools/find_runtime_symbols/README
new file mode 100644
index 00000000000..ee5c2ac88ca
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/README
@@ -0,0 +1,24 @@
+This script maps runtime addresses to symbol names. It is robust over
+Address Space Layout Randomization (ASLR) since it uses runtime addresses with
+runtime mapping information (/proc/.../maps).
+Like 'pprof --symbols' in gperftools <http://code.google.com/p/gperftools/>.
+
+
+Step 1: Prepare symbol information.
+
+It is required to collect symbol information before mapping runtime addresses
+to symbol names.
+
+./prepare_symbol_info.py /path/to/maps [/another/path/to/symbol_info_dir]
+
+The required 'maps' file is /proc/.../maps of the process at runtime.
+
+
+Step 2: Find symbols.
+
+./find_runtime_symbols.py /path/to/symbol_info_dir < addresses.txt
+
+'symbol_info_dir' is the result of the Step 1.
+The stdin should be a list of hex addresses to map, one per line.
+
+The results will be printed to stdout like 'pprof --symbols'.
diff --git a/chromium/tools/find_runtime_symbols/find_runtime_symbols.py b/chromium/tools/find_runtime_symbols/find_runtime_symbols.py
new file mode 100755
index 00000000000..bed9e800b17
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/find_runtime_symbols.py
@@ -0,0 +1,207 @@
+#!/usr/bin/env python
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Find symbols in a binary corresponding to given runtime virtual addresses.
+
+Note that source file names are treated as symbols in this script while they
+are actually not.
+"""
+
+import json
+import logging
+import os
+import sys
+
+from static_symbols import StaticSymbolsInFile
+from proc_maps import ProcMaps
+
+try:
+ from collections import OrderedDict # pylint: disable=E0611
+except ImportError:
+ BASE_PATH = os.path.dirname(os.path.abspath(__file__))
+ SIMPLEJSON_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir, 'third_party')
+ sys.path.insert(0, SIMPLEJSON_PATH)
+ from simplejson import OrderedDict
+
+
+FUNCTION_SYMBOLS = 0
+SOURCEFILE_SYMBOLS = 1
+TYPEINFO_SYMBOLS = 2
+
+_MAPS_FILENAME = 'maps'
+_FILES_FILENAME = 'files.json'
+
+
+class RuntimeSymbolsInProcess(object):
+ def __init__(self):
+ self._maps = None
+ self._static_symbols_in_filse = {}
+
+ def find_procedure(self, runtime_address):
+ for vma in self._maps.iter(ProcMaps.executable):
+ if vma.begin <= runtime_address < vma.end:
+ static_symbols = self._static_symbols_in_filse.get(vma.name)
+ if static_symbols:
+ return static_symbols.find_procedure_by_runtime_address(
+ runtime_address, vma)
+ else:
+ return None
+ return None
+
+ def find_sourcefile(self, runtime_address):
+ for vma in self._maps.iter(ProcMaps.executable):
+ if vma.begin <= runtime_address < vma.end:
+ static_symbols = self._static_symbols_in_filse.get(vma.name)
+ if static_symbols:
+ return static_symbols.find_sourcefile_by_runtime_address(
+ runtime_address, vma)
+ else:
+ return None
+ return None
+
+ def find_typeinfo(self, runtime_address):
+ for vma in self._maps.iter(ProcMaps.constants):
+ if vma.begin <= runtime_address < vma.end:
+ static_symbols = self._static_symbols_in_filse.get(vma.name)
+ if static_symbols:
+ return static_symbols.find_typeinfo_by_runtime_address(
+ runtime_address, vma)
+ else:
+ return None
+ return None
+
+ @staticmethod
+ def load(prepared_data_dir):
+ symbols_in_process = RuntimeSymbolsInProcess()
+
+ with open(os.path.join(prepared_data_dir, _MAPS_FILENAME), mode='r') as f:
+ symbols_in_process._maps = ProcMaps.load(f)
+ with open(os.path.join(prepared_data_dir, _FILES_FILENAME), mode='r') as f:
+ files = json.load(f)
+
+ # pylint: disable=W0212
+ for vma in symbols_in_process._maps.iter(ProcMaps.executable_and_constants):
+ file_entry = files.get(vma.name)
+ if not file_entry:
+ continue
+
+ static_symbols = StaticSymbolsInFile(vma.name)
+
+ nm_entry = file_entry.get('nm')
+ if nm_entry and nm_entry['format'] == 'bsd':
+ with open(os.path.join(prepared_data_dir, nm_entry['file']), 'r') as f:
+ static_symbols.load_nm_bsd(f, nm_entry['mangled'])
+
+ readelf_entry = file_entry.get('readelf-e')
+ if readelf_entry:
+ with open(os.path.join(prepared_data_dir, readelf_entry['file']),
+ 'r') as f:
+ static_symbols.load_readelf_ew(f)
+
+ decodedline_file_entry = file_entry.get('readelf-debug-decodedline-file')
+ if decodedline_file_entry:
+ with open(os.path.join(prepared_data_dir,
+ decodedline_file_entry['file']), 'r') as f:
+ static_symbols.load_readelf_debug_decodedline_file(f)
+
+ symbols_in_process._static_symbols_in_filse[vma.name] = static_symbols
+
+ return symbols_in_process
+
+
+def _find_runtime_function_symbols(symbols_in_process, addresses):
+ result = OrderedDict()
+ for address in addresses:
+ if isinstance(address, basestring):
+ address = int(address, 16)
+ found = symbols_in_process.find_procedure(address)
+ if found:
+ result[address] = found.name
+ else:
+ result[address] = '0x%016x' % address
+ return result
+
+
+def _find_runtime_sourcefile_symbols(symbols_in_process, addresses):
+ result = OrderedDict()
+ for address in addresses:
+ if isinstance(address, basestring):
+ address = int(address, 16)
+ found = symbols_in_process.find_sourcefile(address)
+ if found:
+ result[address] = found
+ else:
+ result[address] = ''
+ return result
+
+
+def _find_runtime_typeinfo_symbols(symbols_in_process, addresses):
+ result = OrderedDict()
+ for address in addresses:
+ if isinstance(address, basestring):
+ address = int(address, 16)
+ if address == 0:
+ result[address] = 'no typeinfo'
+ else:
+ found = symbols_in_process.find_typeinfo(address)
+ if found:
+ if found.startswith('typeinfo for '):
+ result[address] = found[13:]
+ else:
+ result[address] = found
+ else:
+ result[address] = '0x%016x' % address
+ return result
+
+
+_INTERNAL_FINDERS = {
+ FUNCTION_SYMBOLS: _find_runtime_function_symbols,
+ SOURCEFILE_SYMBOLS: _find_runtime_sourcefile_symbols,
+ TYPEINFO_SYMBOLS: _find_runtime_typeinfo_symbols,
+ }
+
+
+def find_runtime_symbols(symbol_type, symbols_in_process, addresses):
+ return _INTERNAL_FINDERS[symbol_type](symbols_in_process, addresses)
+
+
+def main():
+ # FIX: Accept only .pre data
+ if len(sys.argv) < 2:
+ sys.stderr.write("""Usage:
+%s /path/to/prepared_data_dir/ < addresses.txt
+""" % sys.argv[0])
+ return 1
+
+ log = logging.getLogger('find_runtime_symbols')
+ log.setLevel(logging.WARN)
+ handler = logging.StreamHandler()
+ handler.setLevel(logging.WARN)
+ formatter = logging.Formatter('%(message)s')
+ handler.setFormatter(formatter)
+ log.addHandler(handler)
+
+ prepared_data_dir = sys.argv[1]
+ if not os.path.exists(prepared_data_dir):
+ log.warn("Nothing found: %s" % prepared_data_dir)
+ return 1
+ if not os.path.isdir(prepared_data_dir):
+ log.warn("Not a directory: %s" % prepared_data_dir)
+ return 1
+
+ symbols_in_process = RuntimeSymbolsInProcess.load(prepared_data_dir)
+ symbols_dict = find_runtime_symbols(FUNCTION_SYMBOLS,
+ symbols_in_process,
+ sys.stdin)
+ for address, symbol in symbols_dict:
+ if symbol:
+ print '%016x %s' % (address, symbol)
+ else:
+ print '%016x' % address
+
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/chromium/tools/find_runtime_symbols/prepare_symbol_info.py b/chromium/tools/find_runtime_symbols/prepare_symbol_info.py
new file mode 100755
index 00000000000..d5503881a1c
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/prepare_symbol_info.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env python
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import hashlib
+import json
+import logging
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+
+from proc_maps import ProcMaps
+
+
+BASE_PATH = os.path.dirname(os.path.abspath(__file__))
+REDUCE_DEBUGLINE_PATH = os.path.join(BASE_PATH, 'reduce_debugline.py')
+LOGGER = logging.getLogger('prepare_symbol_info')
+
+
+def _dump_command_result(command, output_dir_path, basename, suffix):
+ handle_out, filename_out = tempfile.mkstemp(
+ suffix=suffix, prefix=basename + '.', dir=output_dir_path)
+ handle_err, filename_err = tempfile.mkstemp(
+ suffix=suffix + '.err', prefix=basename + '.', dir=output_dir_path)
+ error = False
+ try:
+ subprocess.check_call(
+ command, stdout=handle_out, stderr=handle_err, shell=True)
+ except (OSError, subprocess.CalledProcessError):
+ error = True
+ finally:
+ os.close(handle_err)
+ os.close(handle_out)
+
+ if os.path.exists(filename_err):
+ if LOGGER.getEffectiveLevel() <= logging.DEBUG:
+ with open(filename_err, 'r') as f:
+ for line in f:
+ LOGGER.debug(line.rstrip())
+ os.remove(filename_err)
+
+ if os.path.exists(filename_out) and (
+ os.path.getsize(filename_out) == 0 or error):
+ os.remove(filename_out)
+ return None
+
+ if not os.path.exists(filename_out):
+ return None
+
+ return filename_out
+
+
+def prepare_symbol_info(maps_path,
+ output_dir_path=None,
+ alternative_dirs=None,
+ use_tempdir=False,
+ use_source_file_name=False):
+ """Prepares (collects) symbol information files for find_runtime_symbols.
+
+ 1) If |output_dir_path| is specified, it tries collecting symbol information
+ files in the given directory |output_dir_path|.
+ 1-a) If |output_dir_path| doesn't exist, create the directory and use it.
+ 1-b) If |output_dir_path| is an empty directory, use it.
+ 1-c) If |output_dir_path| is a directory which has 'files.json', assumes that
+ files are already collected and just ignores it.
+ 1-d) Otherwise, depends on |use_tempdir|.
+
+ 2) If |output_dir_path| is not specified, it tries to create a new directory
+ depending on 'maps_path'.
+
+ If it cannot create a new directory, creates a temporary directory depending
+ on |use_tempdir|. If |use_tempdir| is False, returns None.
+
+ Args:
+ maps_path: A path to a file which contains '/proc/<pid>/maps'.
+ alternative_dirs: A mapping from a directory '/path/on/target' where the
+ target process runs to a directory '/path/on/host' where the script
+ reads the binary. Considered to be used for Android binaries.
+ output_dir_path: A path to a directory where files are prepared.
+ use_tempdir: If True, it creates a temporary directory when it cannot
+ create a new directory.
+ use_source_file_name: If True, it adds reduced result of 'readelf -wL'
+ to find source file names.
+
+ Returns:
+ A pair of a path to the prepared directory and a boolean representing
+ if it created a temporary directory or not.
+ """
+ alternative_dirs = alternative_dirs or {}
+ if not output_dir_path:
+ matched = re.match('^(.*)\.maps$', os.path.basename(maps_path))
+ if matched:
+ output_dir_path = matched.group(1) + '.pre'
+ if not output_dir_path:
+ matched = re.match('^/proc/(.*)/maps$', os.path.realpath(maps_path))
+ if matched:
+ output_dir_path = matched.group(1) + '.pre'
+ if not output_dir_path:
+ output_dir_path = os.path.basename(maps_path) + '.pre'
+ # TODO(dmikurube): Find another candidate for output_dir_path.
+
+ used_tempdir = False
+ LOGGER.info('Data for profiling will be collected in "%s".' % output_dir_path)
+ if os.path.exists(output_dir_path):
+ if os.path.isdir(output_dir_path) and not os.listdir(output_dir_path):
+ LOGGER.warn('Using an empty existing directory "%s".' % output_dir_path)
+ else:
+ LOGGER.warn('A file or a directory exists at "%s".' % output_dir_path)
+ if os.path.exists(os.path.join(output_dir_path, 'files.json')):
+ LOGGER.warn('Using the existing directory "%s".' % output_dir_path)
+ return output_dir_path, used_tempdir
+ else:
+ if use_tempdir:
+ output_dir_path = tempfile.mkdtemp()
+ used_tempdir = True
+ LOGGER.warn('Using a temporary directory "%s".' % output_dir_path)
+ else:
+ LOGGER.warn('The directory "%s" is not available.' % output_dir_path)
+ return None, used_tempdir
+ else:
+ LOGGER.info('Creating a new directory "%s".' % output_dir_path)
+ try:
+ os.mkdir(output_dir_path)
+ except OSError:
+ LOGGER.warn('A directory "%s" cannot be created.' % output_dir_path)
+ if use_tempdir:
+ output_dir_path = tempfile.mkdtemp()
+ used_tempdir = True
+ LOGGER.warn('Using a temporary directory "%s".' % output_dir_path)
+ else:
+ LOGGER.warn('The directory "%s" is not available.' % output_dir_path)
+ return None, used_tempdir
+
+ shutil.copyfile(maps_path, os.path.join(output_dir_path, 'maps'))
+
+ with open(maps_path, mode='r') as f:
+ maps = ProcMaps.load(f)
+
+ LOGGER.debug('Listing up symbols.')
+ files = {}
+ for entry in maps.iter(ProcMaps.executable):
+ LOGGER.debug(' %016x-%016x +%06x %s' % (
+ entry.begin, entry.end, entry.offset, entry.name))
+ binary_path = entry.name
+ for target_path, host_path in alternative_dirs.iteritems():
+ if entry.name.startswith(target_path):
+ binary_path = entry.name.replace(target_path, host_path, 1)
+ nm_filename = _dump_command_result(
+ 'nm -n --format bsd %s | c++filt' % binary_path,
+ output_dir_path, os.path.basename(binary_path), '.nm')
+ if not nm_filename:
+ continue
+ readelf_e_filename = _dump_command_result(
+ 'readelf -eW %s' % binary_path,
+ output_dir_path, os.path.basename(binary_path), '.readelf-e')
+ if not readelf_e_filename:
+ continue
+ readelf_debug_decodedline_file = None
+ if use_source_file_name:
+ readelf_debug_decodedline_file = _dump_command_result(
+ 'readelf -wL %s | %s' % (binary_path, REDUCE_DEBUGLINE_PATH),
+ output_dir_path, os.path.basename(binary_path), '.readelf-wL')
+
+ files[entry.name] = {}
+ files[entry.name]['nm'] = {
+ 'file': os.path.basename(nm_filename),
+ 'format': 'bsd',
+ 'mangled': False}
+ files[entry.name]['readelf-e'] = {
+ 'file': os.path.basename(readelf_e_filename)}
+ if readelf_debug_decodedline_file:
+ files[entry.name]['readelf-debug-decodedline-file'] = {
+ 'file': os.path.basename(readelf_debug_decodedline_file)}
+
+ files[entry.name]['size'] = os.stat(binary_path).st_size
+
+ with open(binary_path, 'rb') as entry_f:
+ md5 = hashlib.md5()
+ sha1 = hashlib.sha1()
+ chunk = entry_f.read(1024 * 1024)
+ while chunk:
+ md5.update(chunk)
+ sha1.update(chunk)
+ chunk = entry_f.read(1024 * 1024)
+ files[entry.name]['sha1'] = sha1.hexdigest()
+ files[entry.name]['md5'] = md5.hexdigest()
+
+ with open(os.path.join(output_dir_path, 'files.json'), 'w') as f:
+ json.dump(files, f, indent=2, sort_keys=True)
+
+ LOGGER.info('Collected symbol information at "%s".' % output_dir_path)
+ return output_dir_path, used_tempdir
+
+
+def main():
+ if not sys.platform.startswith('linux'):
+ sys.stderr.write('This script work only on Linux.')
+ return 1
+
+ LOGGER.setLevel(logging.DEBUG)
+ handler = logging.StreamHandler()
+ handler.setLevel(logging.INFO)
+ formatter = logging.Formatter('%(message)s')
+ handler.setFormatter(formatter)
+ LOGGER.addHandler(handler)
+
+ # TODO(dmikurube): Specify |alternative_dirs| from command line.
+ if len(sys.argv) < 2:
+ sys.stderr.write("""Usage:
+%s /path/to/maps [/path/to/output_data_dir/]
+""" % sys.argv[0])
+ return 1
+ elif len(sys.argv) == 2:
+ result, _ = prepare_symbol_info(sys.argv[1])
+ else:
+ result, _ = prepare_symbol_info(sys.argv[1], sys.argv[2])
+
+ return not result
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/chromium/tools/find_runtime_symbols/proc_maps.py b/chromium/tools/find_runtime_symbols/proc_maps.py
new file mode 100644
index 00000000000..2d917b32124
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/proc_maps.py
@@ -0,0 +1,125 @@
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import re
+
+
+_MAPS_PATTERN = re.compile(
+ r'^([a-f0-9]+)-([a-f0-9]+)\s+(.)(.)(.)(.)\s+([a-f0-9]+)\s+(\S+):(\S+)\s+'
+ r'(\d+)\s*(.*)$', re.IGNORECASE)
+
+
+class ProcMapsEntry(object):
+ """A class representing one line in /proc/.../maps."""
+
+ def __init__(
+ self, begin, end, readable, writable, executable, private, offset,
+ major, minor, inode, name):
+ self.begin = begin
+ self.end = end
+ self.readable = readable
+ self.writable = writable
+ self.executable = executable
+ self.private = private
+ self.offset = offset
+ self.major = major
+ self.minor = minor
+ self.inode = inode
+ self.name = name
+
+ def as_dict(self):
+ return {
+ 'begin': self.begin,
+ 'end': self.end,
+ 'readable': self.readable,
+ 'writable': self.writable,
+ 'executable': self.executable,
+ 'private': self.private,
+ 'offset': self.offset,
+ 'major': self.major,
+ 'minor': self.minor,
+ 'inode': self.inode,
+ 'name': self.name,
+ }
+
+
+class ProcMaps(object):
+ """A class representing contents in /proc/.../maps."""
+
+ def __init__(self):
+ self._sorted_indexes = []
+ self._dictionary = {}
+ self._sorted = True
+
+ def iter(self, condition):
+ if not self._sorted:
+ self._sorted_indexes.sort()
+ self._sorted = True
+ for index in self._sorted_indexes:
+ if not condition or condition(self._dictionary[index]):
+ yield self._dictionary[index]
+
+ def __iter__(self):
+ if not self._sorted:
+ self._sorted_indexes.sort()
+ self._sorted = True
+ for index in self._sorted_indexes:
+ yield self._dictionary[index]
+
+ @staticmethod
+ def load(f):
+ table = ProcMaps()
+ for line in f:
+ table.append_line(line)
+ return table
+
+ def append_line(self, line):
+ entry = self.parse_line(line)
+ if entry:
+ self._append_entry(entry)
+
+ @staticmethod
+ def parse_line(line):
+ matched = _MAPS_PATTERN.match(line)
+ if matched:
+ return ProcMapsEntry( # pylint: disable=W0212
+ int(matched.group(1), 16), # begin
+ int(matched.group(2), 16), # end
+ matched.group(3), # readable
+ matched.group(4), # writable
+ matched.group(5), # executable
+ matched.group(6), # private
+ int(matched.group(7), 16), # offset
+ matched.group(8), # major
+ matched.group(9), # minor
+ int(matched.group(10), 10), # inode
+ matched.group(11) # name
+ )
+ else:
+ return None
+
+ @staticmethod
+ def constants(entry):
+ return (entry.writable == '-' and entry.executable == '-' and re.match(
+ '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?',
+ entry.name))
+
+ @staticmethod
+ def executable(entry):
+ return (entry.executable == 'x' and re.match(
+ '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?',
+ entry.name))
+
+ @staticmethod
+ def executable_and_constants(entry):
+ return (((entry.writable == '-' and entry.executable == '-') or
+ entry.executable == 'x') and re.match(
+ '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?',
+ entry.name))
+
+ def _append_entry(self, entry):
+ if self._sorted_indexes and self._sorted_indexes[-1] > entry.begin:
+ self._sorted = False
+ self._sorted_indexes.append(entry.begin)
+ self._dictionary[entry.begin] = entry
diff --git a/chromium/tools/find_runtime_symbols/reduce_debugline.py b/chromium/tools/find_runtime_symbols/reduce_debugline.py
new file mode 100755
index 00000000000..75c8c8578d7
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/reduce_debugline.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+# Copyright (c) 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Reduces result of 'readelf -wL' to just a list of starting addresses.
+
+It lists up all addresses where the corresponding source files change. The
+list is sorted in ascending order. See tests/reduce_debugline_test.py for
+examples.
+
+This script assumes that the result of 'readelf -wL' ends with an empty line.
+
+Note: the option '-wL' has the same meaning with '--debug-dump=decodedline'.
+"""
+
+import re
+import sys
+
+
+_FILENAME_PATTERN = re.compile('(CU: |)(.+)\:')
+
+
+def reduce_decoded_debugline(input_file):
+ filename = ''
+ starting_dict = {}
+ started = False
+
+ for line in input_file:
+ line = line.strip()
+ unpacked = line.split(None, 2)
+
+ if len(unpacked) == 3 and unpacked[2].startswith('0x'):
+ if not started and filename:
+ started = True
+ starting_dict[int(unpacked[2], 16)] = filename
+ else:
+ started = False
+ if line.endswith(':'):
+ matched = _FILENAME_PATTERN.match(line)
+ if matched:
+ filename = matched.group(2)
+
+ starting_list = []
+ prev_filename = ''
+ for address in sorted(starting_dict):
+ curr_filename = starting_dict[address]
+ if prev_filename != curr_filename:
+ starting_list.append((address, starting_dict[address]))
+ prev_filename = curr_filename
+ return starting_list
+
+
+def main():
+ if len(sys.argv) != 1:
+ print >> sys.stderr, 'Unsupported arguments'
+ return 1
+
+ starting_list = reduce_decoded_debugline(sys.stdin)
+ bits64 = starting_list[-1][0] > 0xffffffff
+ for address, filename in starting_list:
+ if bits64:
+ print '%016x %s' % (address, filename)
+ else:
+ print '%08x %s' % (address, filename)
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/chromium/tools/find_runtime_symbols/static_symbols.py b/chromium/tools/find_runtime_symbols/static_symbols.py
new file mode 100644
index 00000000000..cd57bacd99a
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/static_symbols.py
@@ -0,0 +1,277 @@
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import bisect
+import re
+
+
+_ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?')
+_TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')
+_LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')
+_READELF_SECTION_HEADER_PATTER = re.compile(
+ '^\s*\[\s*(Nr|\d+)\]\s+(|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+'
+ '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+'
+ '([0-9]+)\s+([0-9]+)\s+([0-9]+)')
+
+
+class ParsingException(Exception):
+ def __str__(self):
+ return repr(self.args[0])
+
+
+class AddressMapping(object):
+ def __init__(self):
+ self._symbol_map = {}
+
+ def append(self, start, entry):
+ self._symbol_map[start] = entry
+
+ def find(self, address):
+ return self._symbol_map.get(address)
+
+
+class RangeAddressMapping(AddressMapping):
+ def __init__(self):
+ super(RangeAddressMapping, self).__init__()
+ self._sorted_start_list = []
+ self._is_sorted = True
+
+ def append(self, start, entry):
+ if self._sorted_start_list:
+ if self._sorted_start_list[-1] > start:
+ self._is_sorted = False
+ elif self._sorted_start_list[-1] == start:
+ return
+ self._sorted_start_list.append(start)
+ self._symbol_map[start] = entry
+
+ def find(self, address):
+ if not self._sorted_start_list:
+ return None
+ if not self._is_sorted:
+ self._sorted_start_list.sort()
+ self._is_sorted = True
+ found_index = bisect.bisect_left(self._sorted_start_list, address)
+ found_start_address = self._sorted_start_list[found_index - 1]
+ return self._symbol_map[found_start_address]
+
+
+class Procedure(object):
+ """A class for a procedure symbol and an address range for the symbol."""
+
+ def __init__(self, start, end, name):
+ self.start = start
+ self.end = end
+ self.name = name
+
+ def __eq__(self, other):
+ return (self.start == other.start and
+ self.end == other.end and
+ self.name == other.name)
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def __str__(self):
+ return '%x-%x: %s' % (self.start, self.end, self.name)
+
+
+class ElfSection(object):
+ """A class for an elf section header."""
+
+ def __init__(
+ self, number, name, stype, address, offset, size, es, flg, lk, inf, al):
+ self.number = number
+ self.name = name
+ self.stype = stype
+ self.address = address
+ self.offset = offset
+ self.size = size
+ self.es = es
+ self.flg = flg
+ self.lk = lk
+ self.inf = inf
+ self.al = al
+
+ def __eq__(self, other):
+ return (self.number == other.number and
+ self.name == other.name and
+ self.stype == other.stype and
+ self.address == other.address and
+ self.offset == other.offset and
+ self.size == other.size and
+ self.es == other.es and
+ self.flg == other.flg and
+ self.lk == other.lk and
+ self.inf == other.inf and
+ self.al == other.al)
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def __str__(self):
+ return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name)
+
+
+class StaticSymbolsInFile(object):
+ """Represents static symbol information in a binary file."""
+
+ def __init__(self, my_name):
+ self.my_name = my_name
+ self._elf_sections = []
+ self._procedures = RangeAddressMapping()
+ self._sourcefiles = RangeAddressMapping()
+ self._typeinfos = AddressMapping()
+
+ def _append_elf_section(self, elf_section):
+ self._elf_sections.append(elf_section)
+
+ def _append_procedure(self, start, procedure):
+ self._procedures.append(start, procedure)
+
+ def _append_sourcefile(self, start, sourcefile):
+ self._sourcefiles.append(start, sourcefile)
+
+ def _append_typeinfo(self, start, typeinfo):
+ self._typeinfos.append(start, typeinfo)
+
+ def _find_symbol_by_runtime_address(self, address, vma, target):
+ if not (vma.begin <= address < vma.end):
+ return None
+
+ if vma.name != self.my_name:
+ return None
+
+ file_offset = address - (vma.begin - vma.offset)
+ elf_address = None
+ for section in self._elf_sections:
+ if section.offset <= file_offset < (section.offset + section.size):
+ elf_address = section.address + file_offset - section.offset
+ if not elf_address:
+ return None
+
+ return target.find(elf_address)
+
+ def find_procedure_by_runtime_address(self, address, vma):
+ return self._find_symbol_by_runtime_address(address, vma, self._procedures)
+
+ def find_sourcefile_by_runtime_address(self, address, vma):
+ return self._find_symbol_by_runtime_address(address, vma, self._sourcefiles)
+
+ def find_typeinfo_by_runtime_address(self, address, vma):
+ return self._find_symbol_by_runtime_address(address, vma, self._typeinfos)
+
+ def load_readelf_ew(self, f):
+ found_header = False
+ for line in f:
+ if line.rstrip() == 'Section Headers:':
+ found_header = True
+ break
+ if not found_header:
+ return None
+
+ for line in f:
+ line = line.rstrip()
+ matched = _READELF_SECTION_HEADER_PATTER.match(line)
+ if matched:
+ self._append_elf_section(ElfSection(
+ int(matched.group(1), 10), # number
+ matched.group(2), # name
+ matched.group(3), # stype
+ int(matched.group(4), 16), # address
+ int(matched.group(5), 16), # offset
+ int(matched.group(6), 16), # size
+ matched.group(7), # es
+ matched.group(8), # flg
+ matched.group(9), # lk
+ matched.group(10), # inf
+ matched.group(11) # al
+ ))
+ else:
+ if line in ('Key to Flags:', 'Program Headers:'):
+ break
+
+ def load_readelf_debug_decodedline_file(self, input_file):
+ for line in input_file:
+ splitted = line.rstrip().split(None, 2)
+ self._append_sourcefile(int(splitted[0], 16), splitted[1])
+
+ @staticmethod
+ def _parse_nm_bsd_line(line):
+ if line[8] == ' ':
+ return line[0:8], line[9], line[11:]
+ elif line[16] == ' ':
+ return line[0:16], line[17], line[19:]
+ raise ParsingException('Invalid nm output.')
+
+ @staticmethod
+ def _get_short_function_name(function):
+ while True:
+ function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)
+ if not number:
+ break
+ while True:
+ function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)
+ if not number:
+ break
+ return _LEADING_TYPE_PATTERN.sub('\g<1>', function)
+
+ def load_nm_bsd(self, f, mangled=False):
+ last_start = 0
+ routine = ''
+
+ for line in f:
+ line = line.rstrip()
+ sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line)
+
+ if sym_value[0] == ' ':
+ continue
+
+ start_val = int(sym_value, 16)
+
+ if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and
+ (not mangled and sym_name.startswith('typeinfo'))):
+ self._append_typeinfo(start_val, sym_name)
+
+ # It's possible for two symbols to share the same address, if
+ # one is a zero-length variable (like __start_google_malloc) or
+ # one symbol is a weak alias to another (like __libc_malloc).
+ # In such cases, we want to ignore all values except for the
+ # actual symbol, which in nm-speak has type "T". The logic
+ # below does this, though it's a bit tricky: what happens when
+ # we have a series of lines with the same address, is the first
+ # one gets queued up to be processed. However, it won't
+ # *actually* be processed until later, when we read a line with
+ # a different address. That means that as long as we're reading
+ # lines with the same address, we have a chance to replace that
+ # item in the queue, which we do whenever we see a 'T' entry --
+ # that is, a line with type 'T'. If we never see a 'T' entry,
+ # we'll just go ahead and process the first entry (which never
+ # got touched in the queue), and ignore the others.
+ if start_val == last_start and (sym_type == 't' or sym_type == 'T'):
+ # We are the 'T' symbol at this address, replace previous symbol.
+ routine = sym_name
+ continue
+ elif start_val == last_start:
+ # We're not the 'T' symbol at this address, so ignore us.
+ continue
+
+ # Tag this routine with the starting address in case the image
+ # has multiple occurrences of this routine. We use a syntax
+ # that resembles template paramters that are automatically
+ # stripped out by ShortFunctionName()
+ sym_name += "<%016x>" % start_val
+
+ if not mangled:
+ routine = self._get_short_function_name(routine)
+ self._append_procedure(
+ last_start, Procedure(last_start, start_val, routine))
+
+ last_start = start_val
+ routine = sym_name
+
+ if not mangled:
+ routine = self._get_short_function_name(routine)
+ self._append_procedure(
+ last_start, Procedure(last_start, last_start, routine))