Initial import.

author: Zeno Albisser <zeno.albisser@digia.com> 2013-08-15 21:46:11 +0200
committer: Zeno Albisser <zeno.albisser@digia.com> 2013-08-15 21:46:11 +0200
commit: 679147eead574d186ebf3069647b4c23e8ccace6 (patch)
tree: fc247a0ac8ff119f7c8550879ebb6d3dd8d1ff69 /chromium/tools/find_runtime_symbols
download: qtwebengine-chromium-679147eead574d186ebf3069647b4c23e8ccace6.tar.gz
8 files changed, 973 insertions, 0 deletions
diff --git a/chromium/tools/find_runtime_symbols/OWNERS b/chromium/tools/find_runtime_symbols/OWNERS
new file mode 100644
index 00000000000..aeea00ec3e0
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/OWNERS
@@ -0,0 +1 @@
+dmikurube@chromium.org
diff --git a/chromium/tools/find_runtime_symbols/PRESUBMIT.py b/chromium/tools/find_runtime_symbols/PRESUBMIT.py
new file mode 100644
index 00000000000..8d6889ce3f9
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/PRESUBMIT.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Top-level presubmit script for find_runtime_symbols.
+
+See http://dev.chromium.org/developers/how-tos/depottools/presubmit-scripts for
+details on the presubmit API built into gcl.
+"""
+
+
+def CommonChecks(input_api, output_api):
+  import sys
+  def join(*args):
+    return input_api.os_path.join(input_api.PresubmitLocalPath(), *args)
+
+  output = []
+  sys_path_backup = sys.path
+  try:
+    sys.path = [
+      join('..', 'find_runtime_symbols'),
+    ] + sys.path
+    output.extend(input_api.canned_checks.RunPylint(input_api, output_api))
+  finally:
+    sys.path = sys_path_backup
+
+  output.extend(
+      input_api.canned_checks.RunUnitTestsInDirectory(
+          input_api, output_api,
+          input_api.os_path.join(input_api.PresubmitLocalPath(), 'tests'),
+          whitelist=[r'.+_test\.py$']))
+
+  if input_api.is_committing:
+    output.extend(input_api.canned_checks.PanProjectChecks(input_api,
+                                                           output_api,
+                                                           owners_check=False))
+  return output
+
+
+def CheckChangeOnUpload(input_api, output_api):
+  return CommonChecks(input_api, output_api)
+
+
+def CheckChangeOnCommit(input_api, output_api):
+  return CommonChecks(input_api, output_api)
diff --git a/chromium/tools/find_runtime_symbols/README b/chromium/tools/find_runtime_symbols/README
new file mode 100644
index 00000000000..ee5c2ac88ca
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/README
@@ -0,0 +1,24 @@
+This script maps runtime addresses to symbol names.  It is robust over
+Address Space Layout Randomization (ASLR) since it uses runtime addresses with
+runtime mapping information (/proc/.../maps).
+Like 'pprof --symbols' in gperftools <http://code.google.com/p/gperftools/>.
+
+
+Step 1: Prepare symbol information.
+
+It is required to collect symbol information before mapping runtime addresses
+to symbol names.
+
+./prepare_symbol_info.py /path/to/maps [/another/path/to/symbol_info_dir]
+
+The required 'maps' file is /proc/.../maps of the process at runtime.
+
+
+Step 2: Find symbols.
+
+./find_runtime_symbols.py /path/to/symbol_info_dir < addresses.txt
+
+'symbol_info_dir' is the result of the Step 1.
+The stdin should be a list of hex addresses to map, one per line.
+
+The results will be printed to stdout like 'pprof --symbols'.
diff --git a/chromium/tools/find_runtime_symbols/find_runtime_symbols.py b/chromium/tools/find_runtime_symbols/find_runtime_symbols.py
new file mode 100755
index 00000000000..bed9e800b17
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/find_runtime_symbols.py
@@ -0,0 +1,207 @@
+#!/usr/bin/env python
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Find symbols in a binary corresponding to given runtime virtual addresses.
+
+Note that source file names are treated as symbols in this script while they
+are actually not.
+"""
+
+import json
+import logging
+import os
+import sys
+
+from static_symbols import StaticSymbolsInFile
+from proc_maps import ProcMaps
+
+try:
+  from collections import OrderedDict  # pylint: disable=E0611
+except ImportError:
+  BASE_PATH = os.path.dirname(os.path.abspath(__file__))
+  SIMPLEJSON_PATH = os.path.join(BASE_PATH, os.pardir, os.pardir, 'third_party')
+  sys.path.insert(0, SIMPLEJSON_PATH)
+  from simplejson import OrderedDict
+
+
+FUNCTION_SYMBOLS = 0
+SOURCEFILE_SYMBOLS = 1
+TYPEINFO_SYMBOLS = 2
+
+_MAPS_FILENAME = 'maps'
+_FILES_FILENAME = 'files.json'
+
+
+class RuntimeSymbolsInProcess(object):
+  def __init__(self):
+    self._maps = None
+    self._static_symbols_in_filse = {}
+
+  def find_procedure(self, runtime_address):
+    for vma in self._maps.iter(ProcMaps.executable):
+      if vma.begin <= runtime_address < vma.end:
+        static_symbols = self._static_symbols_in_filse.get(vma.name)
+        if static_symbols:
+          return static_symbols.find_procedure_by_runtime_address(
+              runtime_address, vma)
+        else:
+          return None
+    return None
+
+  def find_sourcefile(self, runtime_address):
+    for vma in self._maps.iter(ProcMaps.executable):
+      if vma.begin <= runtime_address < vma.end:
+        static_symbols = self._static_symbols_in_filse.get(vma.name)
+        if static_symbols:
+          return static_symbols.find_sourcefile_by_runtime_address(
+              runtime_address, vma)
+        else:
+          return None
+    return None
+
+  def find_typeinfo(self, runtime_address):
+    for vma in self._maps.iter(ProcMaps.constants):
+      if vma.begin <= runtime_address < vma.end:
+        static_symbols = self._static_symbols_in_filse.get(vma.name)
+        if static_symbols:
+          return static_symbols.find_typeinfo_by_runtime_address(
+              runtime_address, vma)
+        else:
+          return None
+    return None
+
+  @staticmethod
+  def load(prepared_data_dir):
+    symbols_in_process = RuntimeSymbolsInProcess()
+
+    with open(os.path.join(prepared_data_dir, _MAPS_FILENAME), mode='r') as f:
+      symbols_in_process._maps = ProcMaps.load(f)
+    with open(os.path.join(prepared_data_dir, _FILES_FILENAME), mode='r') as f:
+      files = json.load(f)
+
+    # pylint: disable=W0212
+    for vma in symbols_in_process._maps.iter(ProcMaps.executable_and_constants):
+      file_entry = files.get(vma.name)
+      if not file_entry:
+        continue
+
+      static_symbols = StaticSymbolsInFile(vma.name)
+
+      nm_entry = file_entry.get('nm')
+      if nm_entry and nm_entry['format'] == 'bsd':
+        with open(os.path.join(prepared_data_dir, nm_entry['file']), 'r') as f:
+          static_symbols.load_nm_bsd(f, nm_entry['mangled'])
+
+      readelf_entry = file_entry.get('readelf-e')
+      if readelf_entry:
+        with open(os.path.join(prepared_data_dir, readelf_entry['file']),
+                  'r') as f:
+          static_symbols.load_readelf_ew(f)
+
+      decodedline_file_entry = file_entry.get('readelf-debug-decodedline-file')
+      if decodedline_file_entry:
+        with open(os.path.join(prepared_data_dir,
+                               decodedline_file_entry['file']), 'r') as f:
+          static_symbols.load_readelf_debug_decodedline_file(f)
+
+      symbols_in_process._static_symbols_in_filse[vma.name] = static_symbols
+
+    return symbols_in_process
+
+
+def _find_runtime_function_symbols(symbols_in_process, addresses):
+  result = OrderedDict()
+  for address in addresses:
+    if isinstance(address, basestring):
+      address = int(address, 16)
+    found = symbols_in_process.find_procedure(address)
+    if found:
+      result[address] = found.name
+    else:
+      result[address] = '0x%016x' % address
+  return result
+
+
+def _find_runtime_sourcefile_symbols(symbols_in_process, addresses):
+  result = OrderedDict()
+  for address in addresses:
+    if isinstance(address, basestring):
+      address = int(address, 16)
+    found = symbols_in_process.find_sourcefile(address)
+    if found:
+      result[address] = found
+    else:
+      result[address] = ''
+  return result
+
+
+def _find_runtime_typeinfo_symbols(symbols_in_process, addresses):
+  result = OrderedDict()
+  for address in addresses:
+    if isinstance(address, basestring):
+      address = int(address, 16)
+    if address == 0:
+      result[address] = 'no typeinfo'
+    else:
+      found = symbols_in_process.find_typeinfo(address)
+      if found:
+        if found.startswith('typeinfo for '):
+          result[address] = found[13:]
+        else:
+          result[address] = found
+      else:
+        result[address] = '0x%016x' % address
+  return result
+
+
+_INTERNAL_FINDERS = {
+    FUNCTION_SYMBOLS: _find_runtime_function_symbols,
+    SOURCEFILE_SYMBOLS: _find_runtime_sourcefile_symbols,
+    TYPEINFO_SYMBOLS: _find_runtime_typeinfo_symbols,
+    }
+
+
+def find_runtime_symbols(symbol_type, symbols_in_process, addresses):
+  return _INTERNAL_FINDERS[symbol_type](symbols_in_process, addresses)
+
+
+def main():
+  # FIX: Accept only .pre data
+  if len(sys.argv) < 2:
+    sys.stderr.write("""Usage:
+%s /path/to/prepared_data_dir/ < addresses.txt
+""" % sys.argv[0])
+    return 1
+
+  log = logging.getLogger('find_runtime_symbols')
+  log.setLevel(logging.WARN)
+  handler = logging.StreamHandler()
+  handler.setLevel(logging.WARN)
+  formatter = logging.Formatter('%(message)s')
+  handler.setFormatter(formatter)
+  log.addHandler(handler)
+
+  prepared_data_dir = sys.argv[1]
+  if not os.path.exists(prepared_data_dir):
+    log.warn("Nothing found: %s" % prepared_data_dir)
+    return 1
+  if not os.path.isdir(prepared_data_dir):
+    log.warn("Not a directory: %s" % prepared_data_dir)
+    return 1
+
+  symbols_in_process = RuntimeSymbolsInProcess.load(prepared_data_dir)
+  symbols_dict = find_runtime_symbols(FUNCTION_SYMBOLS,
+                                      symbols_in_process,
+                                      sys.stdin)
+  for address, symbol in symbols_dict:
+    if symbol:
+      print '%016x %s' % (address, symbol)
+    else:
+      print '%016x' % address
+
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/chromium/tools/find_runtime_symbols/prepare_symbol_info.py b/chromium/tools/find_runtime_symbols/prepare_symbol_info.py
new file mode 100755
index 00000000000..d5503881a1c
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/prepare_symbol_info.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env python
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import hashlib
+import json
+import logging
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+
+from proc_maps import ProcMaps
+
+
+BASE_PATH = os.path.dirname(os.path.abspath(__file__))
+REDUCE_DEBUGLINE_PATH = os.path.join(BASE_PATH, 'reduce_debugline.py')
+LOGGER = logging.getLogger('prepare_symbol_info')
+
+
+def _dump_command_result(command, output_dir_path, basename, suffix):
+  handle_out, filename_out = tempfile.mkstemp(
+      suffix=suffix, prefix=basename + '.', dir=output_dir_path)
+  handle_err, filename_err = tempfile.mkstemp(
+      suffix=suffix + '.err', prefix=basename + '.', dir=output_dir_path)
+  error = False
+  try:
+    subprocess.check_call(
+        command, stdout=handle_out, stderr=handle_err, shell=True)
+  except (OSError, subprocess.CalledProcessError):
+    error = True
+  finally:
+    os.close(handle_err)
+    os.close(handle_out)
+
+  if os.path.exists(filename_err):
+    if LOGGER.getEffectiveLevel() <= logging.DEBUG:
+      with open(filename_err, 'r') as f:
+        for line in f:
+          LOGGER.debug(line.rstrip())
+    os.remove(filename_err)
+
+  if os.path.exists(filename_out) and (
+      os.path.getsize(filename_out) == 0 or error):
+    os.remove(filename_out)
+    return None
+
+  if not os.path.exists(filename_out):
+    return None
+
+  return filename_out
+
+
+def prepare_symbol_info(maps_path,
+                        output_dir_path=None,
+                        alternative_dirs=None,
+                        use_tempdir=False,
+                        use_source_file_name=False):
+  """Prepares (collects) symbol information files for find_runtime_symbols.
+
+  1) If |output_dir_path| is specified, it tries collecting symbol information
+  files in the given directory |output_dir_path|.
+  1-a) If |output_dir_path| doesn't exist, create the directory and use it.
+  1-b) If |output_dir_path| is an empty directory, use it.
+  1-c) If |output_dir_path| is a directory which has 'files.json', assumes that
+       files are already collected and just ignores it.
+  1-d) Otherwise, depends on |use_tempdir|.
+
+  2) If |output_dir_path| is not specified, it tries to create a new directory
+  depending on 'maps_path'.
+
+  If it cannot create a new directory, creates a temporary directory depending
+  on |use_tempdir|.  If |use_tempdir| is False, returns None.
+
+  Args:
+      maps_path: A path to a file which contains '/proc/<pid>/maps'.
+      alternative_dirs: A mapping from a directory '/path/on/target' where the
+          target process runs to a directory '/path/on/host' where the script
+          reads the binary.  Considered to be used for Android binaries.
+      output_dir_path: A path to a directory where files are prepared.
+      use_tempdir: If True, it creates a temporary directory when it cannot
+          create a new directory.
+      use_source_file_name: If True, it adds reduced result of 'readelf -wL'
+          to find source file names.
+
+  Returns:
+      A pair of a path to the prepared directory and a boolean representing
+      if it created a temporary directory or not.
+  """
+  alternative_dirs = alternative_dirs or {}
+  if not output_dir_path:
+    matched = re.match('^(.*)\.maps$', os.path.basename(maps_path))
+    if matched:
+      output_dir_path = matched.group(1) + '.pre'
+  if not output_dir_path:
+    matched = re.match('^/proc/(.*)/maps$', os.path.realpath(maps_path))
+    if matched:
+      output_dir_path = matched.group(1) + '.pre'
+  if not output_dir_path:
+    output_dir_path = os.path.basename(maps_path) + '.pre'
+  # TODO(dmikurube): Find another candidate for output_dir_path.
+
+  used_tempdir = False
+  LOGGER.info('Data for profiling will be collected in "%s".' % output_dir_path)
+  if os.path.exists(output_dir_path):
+    if os.path.isdir(output_dir_path) and not os.listdir(output_dir_path):
+      LOGGER.warn('Using an empty existing directory "%s".' % output_dir_path)
+    else:
+      LOGGER.warn('A file or a directory exists at "%s".' % output_dir_path)
+      if os.path.exists(os.path.join(output_dir_path, 'files.json')):
+        LOGGER.warn('Using the existing directory "%s".' % output_dir_path)
+        return output_dir_path, used_tempdir
+      else:
+        if use_tempdir:
+          output_dir_path = tempfile.mkdtemp()
+          used_tempdir = True
+          LOGGER.warn('Using a temporary directory "%s".' % output_dir_path)
+        else:
+          LOGGER.warn('The directory "%s" is not available.' % output_dir_path)
+          return None, used_tempdir
+  else:
+    LOGGER.info('Creating a new directory "%s".' % output_dir_path)
+    try:
+      os.mkdir(output_dir_path)
+    except OSError:
+      LOGGER.warn('A directory "%s" cannot be created.' % output_dir_path)
+      if use_tempdir:
+        output_dir_path = tempfile.mkdtemp()
+        used_tempdir = True
+        LOGGER.warn('Using a temporary directory "%s".' % output_dir_path)
+      else:
+        LOGGER.warn('The directory "%s" is not available.' % output_dir_path)
+        return None, used_tempdir
+
+  shutil.copyfile(maps_path, os.path.join(output_dir_path, 'maps'))
+
+  with open(maps_path, mode='r') as f:
+    maps = ProcMaps.load(f)
+
+  LOGGER.debug('Listing up symbols.')
+  files = {}
+  for entry in maps.iter(ProcMaps.executable):
+    LOGGER.debug('  %016x-%016x +%06x %s' % (
+        entry.begin, entry.end, entry.offset, entry.name))
+    binary_path = entry.name
+    for target_path, host_path in alternative_dirs.iteritems():
+      if entry.name.startswith(target_path):
+        binary_path = entry.name.replace(target_path, host_path, 1)
+    nm_filename = _dump_command_result(
+        'nm -n --format bsd %s | c++filt' % binary_path,
+        output_dir_path, os.path.basename(binary_path), '.nm')
+    if not nm_filename:
+      continue
+    readelf_e_filename = _dump_command_result(
+        'readelf -eW %s' % binary_path,
+        output_dir_path, os.path.basename(binary_path), '.readelf-e')
+    if not readelf_e_filename:
+      continue
+    readelf_debug_decodedline_file = None
+    if use_source_file_name:
+      readelf_debug_decodedline_file = _dump_command_result(
+          'readelf -wL %s | %s' % (binary_path, REDUCE_DEBUGLINE_PATH),
+          output_dir_path, os.path.basename(binary_path), '.readelf-wL')
+
+    files[entry.name] = {}
+    files[entry.name]['nm'] = {
+        'file': os.path.basename(nm_filename),
+        'format': 'bsd',
+        'mangled': False}
+    files[entry.name]['readelf-e'] = {
+        'file': os.path.basename(readelf_e_filename)}
+    if readelf_debug_decodedline_file:
+      files[entry.name]['readelf-debug-decodedline-file'] = {
+          'file': os.path.basename(readelf_debug_decodedline_file)}
+
+    files[entry.name]['size'] = os.stat(binary_path).st_size
+
+    with open(binary_path, 'rb') as entry_f:
+      md5 = hashlib.md5()
+      sha1 = hashlib.sha1()
+      chunk = entry_f.read(1024 * 1024)
+      while chunk:
+        md5.update(chunk)
+        sha1.update(chunk)
+        chunk = entry_f.read(1024 * 1024)
+      files[entry.name]['sha1'] = sha1.hexdigest()
+      files[entry.name]['md5'] = md5.hexdigest()
+
+  with open(os.path.join(output_dir_path, 'files.json'), 'w') as f:
+    json.dump(files, f, indent=2, sort_keys=True)
+
+  LOGGER.info('Collected symbol information at "%s".' % output_dir_path)
+  return output_dir_path, used_tempdir
+
+
+def main():
+  if not sys.platform.startswith('linux'):
+    sys.stderr.write('This script work only on Linux.')
+    return 1
+
+  LOGGER.setLevel(logging.DEBUG)
+  handler = logging.StreamHandler()
+  handler.setLevel(logging.INFO)
+  formatter = logging.Formatter('%(message)s')
+  handler.setFormatter(formatter)
+  LOGGER.addHandler(handler)
+
+  # TODO(dmikurube): Specify |alternative_dirs| from command line.
+  if len(sys.argv) < 2:
+    sys.stderr.write("""Usage:
+%s /path/to/maps [/path/to/output_data_dir/]
+""" % sys.argv[0])
+    return 1
+  elif len(sys.argv) == 2:
+    result, _ = prepare_symbol_info(sys.argv[1])
+  else:
+    result, _ = prepare_symbol_info(sys.argv[1], sys.argv[2])
+
+  return not result
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/chromium/tools/find_runtime_symbols/proc_maps.py b/chromium/tools/find_runtime_symbols/proc_maps.py
new file mode 100644
index 00000000000..2d917b32124
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/proc_maps.py
@@ -0,0 +1,125 @@
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import re
+
+
+_MAPS_PATTERN = re.compile(
+    r'^([a-f0-9]+)-([a-f0-9]+)\s+(.)(.)(.)(.)\s+([a-f0-9]+)\s+(\S+):(\S+)\s+'
+    r'(\d+)\s*(.*)$', re.IGNORECASE)
+
+
+class ProcMapsEntry(object):
+  """A class representing one line in /proc/.../maps."""
+
+  def __init__(
+      self, begin, end, readable, writable, executable, private, offset,
+      major, minor, inode, name):
+    self.begin = begin
+    self.end = end
+    self.readable = readable
+    self.writable = writable
+    self.executable = executable
+    self.private = private
+    self.offset = offset
+    self.major = major
+    self.minor = minor
+    self.inode = inode
+    self.name = name
+
+  def as_dict(self):
+    return {
+        'begin': self.begin,
+        'end': self.end,
+        'readable': self.readable,
+        'writable': self.writable,
+        'executable': self.executable,
+        'private': self.private,
+        'offset': self.offset,
+        'major': self.major,
+        'minor': self.minor,
+        'inode': self.inode,
+        'name': self.name,
+    }
+
+
+class ProcMaps(object):
+  """A class representing contents in /proc/.../maps."""
+
+  def __init__(self):
+    self._sorted_indexes = []
+    self._dictionary = {}
+    self._sorted = True
+
+  def iter(self, condition):
+    if not self._sorted:
+      self._sorted_indexes.sort()
+      self._sorted = True
+    for index in self._sorted_indexes:
+      if not condition or condition(self._dictionary[index]):
+        yield self._dictionary[index]
+
+  def __iter__(self):
+    if not self._sorted:
+      self._sorted_indexes.sort()
+      self._sorted = True
+    for index in self._sorted_indexes:
+      yield self._dictionary[index]
+
+  @staticmethod
+  def load(f):
+    table = ProcMaps()
+    for line in f:
+      table.append_line(line)
+    return table
+
+  def append_line(self, line):
+    entry = self.parse_line(line)
+    if entry:
+      self._append_entry(entry)
+
+  @staticmethod
+  def parse_line(line):
+    matched = _MAPS_PATTERN.match(line)
+    if matched:
+      return ProcMapsEntry(  # pylint: disable=W0212
+          int(matched.group(1), 16),  # begin
+          int(matched.group(2), 16),  # end
+          matched.group(3),           # readable
+          matched.group(4),           # writable
+          matched.group(5),           # executable
+          matched.group(6),           # private
+          int(matched.group(7), 16),  # offset
+          matched.group(8),           # major
+          matched.group(9),           # minor
+          int(matched.group(10), 10), # inode
+          matched.group(11)           # name
+          )
+    else:
+      return None
+
+  @staticmethod
+  def constants(entry):
+    return (entry.writable == '-' and entry.executable == '-' and re.match(
+        '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?',
+        entry.name))
+
+  @staticmethod
+  def executable(entry):
+    return (entry.executable == 'x' and re.match(
+        '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?',
+        entry.name))
+
+  @staticmethod
+  def executable_and_constants(entry):
+    return (((entry.writable == '-' and entry.executable == '-') or
+             entry.executable == 'x') and re.match(
+        '\S+(\.(so|dll|dylib|bundle)|chrome)((\.\d+)+\w*(\.\d+){0,3})?',
+        entry.name))
+
+  def _append_entry(self, entry):
+    if self._sorted_indexes and self._sorted_indexes[-1] > entry.begin:
+      self._sorted = False
+    self._sorted_indexes.append(entry.begin)
+    self._dictionary[entry.begin] = entry
diff --git a/chromium/tools/find_runtime_symbols/reduce_debugline.py b/chromium/tools/find_runtime_symbols/reduce_debugline.py
new file mode 100755
index 00000000000..75c8c8578d7
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/reduce_debugline.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+# Copyright (c) 2013 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Reduces result of 'readelf -wL' to just a list of starting addresses.
+
+It lists up all addresses where the corresponding source files change.  The
+list is sorted in ascending order.  See tests/reduce_debugline_test.py for
+examples.
+
+This script assumes that the result of 'readelf -wL' ends with an empty line.
+
+Note: the option '-wL' has the same meaning with '--debug-dump=decodedline'.
+"""
+
+import re
+import sys
+
+
+_FILENAME_PATTERN = re.compile('(CU: |)(.+)\:')
+
+
+def reduce_decoded_debugline(input_file):
+  filename = ''
+  starting_dict = {}
+  started = False
+
+  for line in input_file:
+    line = line.strip()
+    unpacked = line.split(None, 2)
+
+    if len(unpacked) == 3 and unpacked[2].startswith('0x'):
+      if not started and filename:
+        started = True
+        starting_dict[int(unpacked[2], 16)] = filename
+    else:
+      started = False
+      if line.endswith(':'):
+        matched = _FILENAME_PATTERN.match(line)
+        if matched:
+          filename = matched.group(2)
+
+  starting_list = []
+  prev_filename = ''
+  for address in sorted(starting_dict):
+    curr_filename = starting_dict[address]
+    if prev_filename != curr_filename:
+      starting_list.append((address, starting_dict[address]))
+    prev_filename = curr_filename
+  return starting_list
+
+
+def main():
+  if len(sys.argv) != 1:
+    print >> sys.stderr, 'Unsupported arguments'
+    return 1
+
+  starting_list = reduce_decoded_debugline(sys.stdin)
+  bits64 = starting_list[-1][0] > 0xffffffff
+  for address, filename in starting_list:
+    if bits64:
+      print '%016x %s' % (address, filename)
+    else:
+      print '%08x %s' % (address, filename)
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/chromium/tools/find_runtime_symbols/static_symbols.py b/chromium/tools/find_runtime_symbols/static_symbols.py
new file mode 100644
index 00000000000..cd57bacd99a
--- /dev/null
+++ b/chromium/tools/find_runtime_symbols/static_symbols.py
@@ -0,0 +1,277 @@
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import bisect
+import re
+
+
+_ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?')
+_TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')
+_LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')
+_READELF_SECTION_HEADER_PATTER = re.compile(
+    '^\s*\[\s*(Nr|\d+)\]\s+(|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+'
+    '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+'
+    '([0-9]+)\s+([0-9]+)\s+([0-9]+)')
+
+
+class ParsingException(Exception):
+  def __str__(self):
+    return repr(self.args[0])
+
+
+class AddressMapping(object):
+  def __init__(self):
+    self._symbol_map = {}
+
+  def append(self, start, entry):
+    self._symbol_map[start] = entry
+
+  def find(self, address):
+    return self._symbol_map.get(address)
+
+
+class RangeAddressMapping(AddressMapping):
+  def __init__(self):
+    super(RangeAddressMapping, self).__init__()
+    self._sorted_start_list = []
+    self._is_sorted = True
+
+  def append(self, start, entry):
+    if self._sorted_start_list:
+      if self._sorted_start_list[-1] > start:
+        self._is_sorted = False
+      elif self._sorted_start_list[-1] == start:
+        return
+    self._sorted_start_list.append(start)
+    self._symbol_map[start] = entry
+
+  def find(self, address):
+    if not self._sorted_start_list:
+      return None
+    if not self._is_sorted:
+      self._sorted_start_list.sort()
+      self._is_sorted = True
+    found_index = bisect.bisect_left(self._sorted_start_list, address)
+    found_start_address = self._sorted_start_list[found_index - 1]
+    return self._symbol_map[found_start_address]
+
+
+class Procedure(object):
+  """A class for a procedure symbol and an address range for the symbol."""
+
+  def __init__(self, start, end, name):
+    self.start = start
+    self.end = end
+    self.name = name
+
+  def __eq__(self, other):
+    return (self.start == other.start and
+            self.end == other.end and
+            self.name == other.name)
+
+  def __ne__(self, other):
+    return not self.__eq__(other)
+
+  def __str__(self):
+    return '%x-%x: %s' % (self.start, self.end, self.name)
+
+
+class ElfSection(object):
+  """A class for an elf section header."""
+
+  def __init__(
+      self, number, name, stype, address, offset, size, es, flg, lk, inf, al):
+    self.number = number
+    self.name = name
+    self.stype = stype
+    self.address = address
+    self.offset = offset
+    self.size = size
+    self.es = es
+    self.flg = flg
+    self.lk = lk
+    self.inf = inf
+    self.al = al
+
+  def __eq__(self, other):
+    return (self.number == other.number and
+            self.name == other.name and
+            self.stype == other.stype and
+            self.address == other.address and
+            self.offset == other.offset and
+            self.size == other.size and
+            self.es == other.es and
+            self.flg == other.flg and
+            self.lk == other.lk and
+            self.inf == other.inf and
+            self.al == other.al)
+
+  def __ne__(self, other):
+    return not self.__eq__(other)
+
+  def __str__(self):
+    return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name)
+
+
+class StaticSymbolsInFile(object):
+  """Represents static symbol information in a binary file."""
+
+  def __init__(self, my_name):
+    self.my_name = my_name
+    self._elf_sections = []
+    self._procedures = RangeAddressMapping()
+    self._sourcefiles = RangeAddressMapping()
+    self._typeinfos = AddressMapping()
+
+  def _append_elf_section(self, elf_section):
+    self._elf_sections.append(elf_section)
+
+  def _append_procedure(self, start, procedure):
+    self._procedures.append(start, procedure)
+
+  def _append_sourcefile(self, start, sourcefile):
+    self._sourcefiles.append(start, sourcefile)
+
+  def _append_typeinfo(self, start, typeinfo):
+    self._typeinfos.append(start, typeinfo)
+
+  def _find_symbol_by_runtime_address(self, address, vma, target):
+    if not (vma.begin <= address < vma.end):
+      return None
+
+    if vma.name != self.my_name:
+      return None
+
+    file_offset = address - (vma.begin - vma.offset)
+    elf_address = None
+    for section in self._elf_sections:
+      if section.offset <= file_offset < (section.offset + section.size):
+        elf_address = section.address + file_offset - section.offset
+    if not elf_address:
+      return None
+
+    return target.find(elf_address)
+
+  def find_procedure_by_runtime_address(self, address, vma):
+    return self._find_symbol_by_runtime_address(address, vma, self._procedures)
+
+  def find_sourcefile_by_runtime_address(self, address, vma):
+    return self._find_symbol_by_runtime_address(address, vma, self._sourcefiles)
+
+  def find_typeinfo_by_runtime_address(self, address, vma):
+    return self._find_symbol_by_runtime_address(address, vma, self._typeinfos)
+
+  def load_readelf_ew(self, f):
+    found_header = False
+    for line in f:
+      if line.rstrip() == 'Section Headers:':
+        found_header = True
+        break
+    if not found_header:
+      return None
+
+    for line in f:
+      line = line.rstrip()
+      matched = _READELF_SECTION_HEADER_PATTER.match(line)
+      if matched:
+        self._append_elf_section(ElfSection(
+            int(matched.group(1), 10), # number
+            matched.group(2), # name
+            matched.group(3), # stype
+            int(matched.group(4), 16), # address
+            int(matched.group(5), 16), # offset
+            int(matched.group(6), 16), # size
+            matched.group(7), # es
+            matched.group(8), # flg
+            matched.group(9), # lk
+            matched.group(10), # inf
+            matched.group(11) # al
+            ))
+      else:
+        if line in ('Key to Flags:', 'Program Headers:'):
+          break
+
+  def load_readelf_debug_decodedline_file(self, input_file):
+    for line in input_file:
+      splitted = line.rstrip().split(None, 2)
+      self._append_sourcefile(int(splitted[0], 16), splitted[1])
+
+  @staticmethod
+  def _parse_nm_bsd_line(line):
+    if line[8] == ' ':
+      return line[0:8], line[9], line[11:]
+    elif line[16] == ' ':
+      return line[0:16], line[17], line[19:]
+    raise ParsingException('Invalid nm output.')
+
+  @staticmethod
+  def _get_short_function_name(function):
+    while True:
+      function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)
+      if not number:
+        break
+    while True:
+      function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)
+      if not number:
+        break
+    return _LEADING_TYPE_PATTERN.sub('\g<1>', function)
+
+  def load_nm_bsd(self, f, mangled=False):
+    last_start = 0
+    routine = ''
+
+    for line in f:
+      line = line.rstrip()
+      sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line)
+
+      if sym_value[0] == ' ':
+        continue
+
+      start_val = int(sym_value, 16)
+
+      if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and
+          (not mangled and sym_name.startswith('typeinfo'))):
+        self._append_typeinfo(start_val, sym_name)
+
+      # It's possible for two symbols to share the same address, if
+      # one is a zero-length variable (like __start_google_malloc) or
+      # one symbol is a weak alias to another (like __libc_malloc).
+      # In such cases, we want to ignore all values except for the
+      # actual symbol, which in nm-speak has type "T".  The logic
+      # below does this, though it's a bit tricky: what happens when
+      # we have a series of lines with the same address, is the first
+      # one gets queued up to be processed.  However, it won't
+      # *actually* be processed until later, when we read a line with
+      # a different address.  That means that as long as we're reading
+      # lines with the same address, we have a chance to replace that
+      # item in the queue, which we do whenever we see a 'T' entry --
+      # that is, a line with type 'T'.  If we never see a 'T' entry,
+      # we'll just go ahead and process the first entry (which never
+      # got touched in the queue), and ignore the others.
+      if start_val == last_start and (sym_type == 't' or sym_type == 'T'):
+        # We are the 'T' symbol at this address, replace previous symbol.
+        routine = sym_name
+        continue
+      elif start_val == last_start:
+        # We're not the 'T' symbol at this address, so ignore us.
+        continue
+
+      # Tag this routine with the starting address in case the image
+      # has multiple occurrences of this routine.  We use a syntax
+      # that resembles template paramters that are automatically
+      # stripped out by ShortFunctionName()
+      sym_name += "<%016x>" % start_val
+
+      if not mangled:
+        routine = self._get_short_function_name(routine)
+      self._append_procedure(
+          last_start, Procedure(last_start, start_val, routine))
+
+      last_start = start_val
+      routine = sym_name
+
+    if not mangled:
+      routine = self._get_short_function_name(routine)
+    self._append_procedure(
+        last_start, Procedure(last_start, last_start, routine))
author	Zeno Albisser <zeno.albisser@digia.com>	2013-08-15 21:46:11 +0200
committer	Zeno Albisser <zeno.albisser@digia.com>	2013-08-15 21:46:11 +0200
commit	679147eead574d186ebf3069647b4c23e8ccace6 (patch)
tree	fc247a0ac8ff119f7c8550879ebb6d3dd8d1ff69 /chromium/tools/find_runtime_symbols
download	qtwebengine-chromium-679147eead574d186ebf3069647b4c23e8ccace6.tar.gz