#!/usr/bin/env vpython # Copyright 2013 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Patch an orderfile. Starting with a list of symbols in a binary and an orderfile (ordered list of sections), matches the symbols in the orderfile and augments each symbol with the symbols residing at the same address (due to having identical code). The output is a list of section or symbols matching rules appropriate for the linker option -section-ordering-file for gold and --symbol-ordering-file for lld. Both linkers are fine with extra directives that aren't matched in the binary, so we construct a file suitable for both, concatenating sections and symbols. We assume that the unpatched orderfile is built for gold, that is, it only contains sections. Note: It is possible to have. - Several symbols mapping to the same offset in the binary. - Several offsets for a given symbol (because we strip the ".clone." and other suffixes) The general pipeline is: 1. Get the symbol infos (name, offset, size, section) from the binary 2. Get the symbol names from the orderfile 3. Find the orderfile symbol names in the symbols coming from the binary 4. For each symbol found, get all the symbols at the same address 5. Output them to an updated orderfile suitable for gold and lld 6. Output catch-all section matching rules for unprofiled methods. This is ineffective for lld, as it doesn't handle wildcards, but puts unordered symbols after the ordered ones. """ import argparse import collections import logging import sys import cyglog_to_orderfile import cygprofile_utils import symbol_extractor # Prefixes for the symbols. We strip them from the incoming symbols, and add # them back in the output file. # Output sections are constructed as prefix + symbol_name, hence the empty # prefix is used to generate the symbol entry for lld. _PREFIXES = ('.text.hot.', '.text.unlikely.', '.text.', '') # Suffixes for the symbols. These are due to method splitting for inlining and # method cloning for various reasons including constant propagation and # inter-procedural optimization. _SUFFIXES = ('.clone.', '.part.', '.isra.', '.constprop.') def RemoveSuffixes(name): """Strips method name suffixes from cloning and splitting. .clone. comes from cloning in -O3. .part. comes from partial method splitting for inlining. .isra. comes from inter-procedural optimizations. .constprop. is cloning for constant propagation. """ for suffix in _SUFFIXES: name = name.split(suffix)[0] return name def _UniqueGenerator(generator): """Converts a generator to skip yielding elements already seen. Example: @_UniqueGenerator def Foo(): yield 1 yield 2 yield 1 yield 3 Foo() yields 1,2,3. """ def _FilteringFunction(*args, **kwargs): returned = set() for item in generator(*args, **kwargs): if item in returned: continue returned.add(item) yield item return _FilteringFunction def _GroupSymbolInfosFromBinary(binary_filename): """Group all the symbols from a binary by name and offset. Args: binary_filename: path to the binary. Returns: A tuple of dict: (offset_to_symbol_infos, name_to_symbol_infos): - offset_to_symbol_infos: {offset: [symbol_info1, ...]} - name_to_symbol_infos: {name: [symbol_info1, ...]} """ symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename) symbol_infos_no_suffixes = [ s._replace(name=RemoveSuffixes(s.name)) for s in symbol_infos] return (symbol_extractor.GroupSymbolInfosByOffset(symbol_infos_no_suffixes), symbol_extractor.GroupSymbolInfosByName(symbol_infos_no_suffixes)) def _StripPrefix(line): """Strips the linker section name prefix from a symbol line. Args: line: a line from an orderfile, usually in the form: .text.SymbolName Returns: The symbol, SymbolName in the example above. """ # Went away with GCC, make sure it doesn't come back, as the orderfile # no longer contains it. assert not line.startswith('.text.startup.') for prefix in _PREFIXES: if prefix and line.startswith(prefix): return line[len(prefix):] return line # Unprefixed case def _SectionNameToSymbols(section_name, section_to_symbols_map): """Yields all symbols which could be referred to by section_name. If the section name is present in the map, the names in the map are returned. Otherwise, any clone annotations and prefixes are stripped from the section name and the remainder is returned. """ if (not section_name or section_name == '.text' or section_name.endswith('*')): return # Don't return anything for catch-all sections if section_name in section_to_symbols_map: for symbol in section_to_symbols_map[section_name]: yield symbol else: name = _StripPrefix(section_name) if name: yield name def GetSectionsFromOrderfile(filename): """Yields the sections from an orderfile. Args: filename: The name of the orderfile. Yields: A list of symbol names. """ with open(filename, 'r') as f: for line in f.xreadlines(): line = line.rstrip('\n') if line: yield line @_UniqueGenerator def GetSymbolsFromOrderfile(filename, section_to_symbols_map): """Yields the symbols from an orderfile. Output elements do not repeat. Args: filename: The name of the orderfile. section_to_symbols_map: The mapping from section to symbol names. If a section name is missing from the mapping, the symbol name is assumed to be the section name with prefixes and suffixes stripped. Yields: A list of symbol names. """ # TODO(lizeb,pasko): Move this method to symbol_extractor.py for section in GetSectionsFromOrderfile(filename): for symbol in _SectionNameToSymbols(RemoveSuffixes(section), section_to_symbols_map): yield symbol def _SymbolsWithSameOffset(profiled_symbol, name_to_symbol_info, offset_to_symbol_info): """Expands a symbol to include all symbols with the same offset. Args: profiled_symbol: the string symbol name to be expanded. name_to_symbol_info: {name: [symbol_info1], ...}, as returned by GetSymbolInfosFromBinary offset_to_symbol_info: {offset: [symbol_info1, ...], ...} Returns: A list of symbol names, or an empty list if profiled_symbol was not in name_to_symbol_info. """ if profiled_symbol not in name_to_symbol_info: return [] symbol_infos = name_to_symbol_info[profiled_symbol] expanded = [] for symbol_info in symbol_infos: expanded += (s.name for s in offset_to_symbol_info[symbol_info.offset]) return expanded @_UniqueGenerator def _SectionMatchingRules(section_name, name_to_symbol_infos, offset_to_symbol_infos, section_to_symbols_map, symbol_to_sections_map, suffixed_sections): """Gets the set of section matching rules for section_name. These rules will include section_name, but also any sections which may contain the same code due to cloning, splitting, or identical code folding. Args: section_name: The section to expand. name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by GetSymbolInfosFromBinary. offset_to_symbol_infos: {offset: [symbol_info1, ...], ...} section_to_symbols_map: The mapping from section to symbol name. Missing section names are treated as per _SectionNameToSymbols. symbol_to_sections_map: The mapping from symbol name to names of linker sections containing the symbol. If a symbol isn't in the mapping, the section names are generated from the set of _PREFIXES with the symbol name. suffixed_sections: A set of sections which can have suffixes. Yields: Section names including at least section_name. """ for name in _ExpandSection(section_name, name_to_symbol_infos, offset_to_symbol_infos, section_to_symbols_map, symbol_to_sections_map): yield name # Since only a subset of methods (mostly those compiled with O2) ever get # suffixes, don't emit the wildcards for ones where it won't be helpful. # Otherwise linking takes too long. if name in suffixed_sections: # TODO(lizeb,pasko): instead of just appending .*, append .suffix.* for # _SUFFIXES. We can't do this right now because that many wildcards # seems to kill the linker (linking libchrome takes 3 hours). This gets # almost all the benefit at a much lower link-time cost, but could cause # problems with unexpected suffixes. yield name + '.*' def _ExpandSection(section_name, name_to_symbol_infos, offset_to_symbol_infos, section_to_symbols_map, symbol_to_sections_map): """Yields the set of section names for section_name. This set will include section_name, but also any sections which may contain the same code due to identical code folding. Args: section_name: The section to expand. name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by GetSymbolInfosFromBinary. offset_to_symbol_infos: {offset: [symbol_info1, ...], ...} section_to_symbols_map: The mapping from section to symbol name. Missing section names are treated as per _SectionNameToSymbols. symbol_to_sections_map: The mapping from symbol name to names of linker sections containing the symbol. If a symbol isn't in the mapping, the section names are generated from the set of _PREFIXES with the symbol name. Yields: Section names including at least section_name. """ yield section_name for first_sym in _SectionNameToSymbols(section_name, section_to_symbols_map): for symbol in _SymbolsWithSameOffset(first_sym, name_to_symbol_infos, offset_to_symbol_infos): if symbol in symbol_to_sections_map: for section in symbol_to_sections_map[symbol]: yield section for prefix in _PREFIXES: yield prefix + symbol @_UniqueGenerator def _ExpandSections(section_names, name_to_symbol_infos, offset_to_symbol_infos, section_to_symbols_map, symbol_to_sections_map, suffixed_sections): """Gets an ordered set of section matching rules for a list of sections. Rules will not be repeated. Args: section_names: The sections to expand. name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by _GroupSymbolInfosFromBinary. offset_to_symbol_infos: {offset: [symbol_info1, ...], ...} section_to_symbols_map: The mapping from section to symbol names. symbol_to_sections_map: The mapping from symbol name to names of linker sections containing the symbol. suffixed_sections: A set of sections which can have suffixes. Yields: Section matching rules including at least section_names. """ for profiled_section in section_names: for section in _SectionMatchingRules( profiled_section, name_to_symbol_infos, offset_to_symbol_infos, section_to_symbols_map, symbol_to_sections_map, suffixed_sections): yield section def _CombineSectionListsByPrimaryName(symbol_to_sections_map): """Combines values of the symbol_to_sections_map by stripping suffixes. Example: {foo: [.text.foo, .text.bar.part.1], foo.constprop.4: [.text.baz.constprop.3]} -> {foo: [.text.foo, .text.bar, .text.baz]} Args: symbol_to_sections_map: Mapping from symbol name to list of section names Returns: The same mapping, but with symbol and section names suffix-stripped. """ simplified = {} for suffixed_symbol, suffixed_sections in symbol_to_sections_map.iteritems(): symbol = RemoveSuffixes(suffixed_symbol) sections = [RemoveSuffixes(section) for section in suffixed_sections] simplified.setdefault(symbol, []).extend(sections) return simplified def _SectionsWithSuffixes(symbol_to_sections_map): """Finds sections which have suffixes applied. Args: symbol_to_sections_map: a map where the values are lists of section names. Returns: A set containing all section names which were seen with suffixes applied. """ sections_with_suffixes = set() for suffixed_sections in symbol_to_sections_map.itervalues(): for suffixed_section in suffixed_sections: section = RemoveSuffixes(suffixed_section) if section != suffixed_section: sections_with_suffixes.add(section) return sections_with_suffixes def _StripSuffixes(section_list): """Remove all suffixes on items in a list of sections or symbols.""" return [RemoveSuffixes(section) for section in section_list] def GeneratePatchedOrderfile(unpatched_orderfile, native_lib_filename, output_filename): """Writes a patched orderfile. Args: unpatched_orderfile: (str) Path to the unpatched orderfile. native_lib_filename: (str) Path to the native library. output_filename: (str) Path to the patched orderfile. """ (offset_to_symbol_infos, name_to_symbol_infos) = _GroupSymbolInfosFromBinary( native_lib_filename) obj_dir = cygprofile_utils.GetObjDir(native_lib_filename) raw_symbol_map = cyglog_to_orderfile.ObjectFileProcessor( obj_dir).GetSymbolToSectionsMap() suffixed = _SectionsWithSuffixes(raw_symbol_map) symbol_to_sections_map = _CombineSectionListsByPrimaryName(raw_symbol_map) section_to_symbols_map = cygprofile_utils.InvertMapping( symbol_to_sections_map) profiled_sections = _StripSuffixes( GetSectionsFromOrderfile(unpatched_orderfile)) expanded_sections = _ExpandSections( profiled_sections, name_to_symbol_infos, offset_to_symbol_infos, section_to_symbols_map, symbol_to_sections_map, suffixed) with open(output_filename, 'w') as f: # Make sure the anchor functions are located in the right place, here and # after everything else. # See the comment in //base/android/library_loader/anchor_functions.cc. for prefix in _PREFIXES: f.write(prefix + 'dummy_function_to_anchor_text\n') for prefix in _PREFIXES: f.write(prefix + 'dummy_function_start_of_ordered_text\n') for section in expanded_sections: f.write(section + '\n') for prefix in _PREFIXES: f.write(prefix + 'dummy_function_end_of_ordered_text\n') # The following is needed otherwise Gold only applies a partial sort. f.write('.text\n') # gets methods not in a section, such as assembly f.write('.text.*\n') # gets everything else # Since wildcards are not supported by lld, the "end of text" anchor symbol # is not emitted, a different mechanism is used instead. See comments in the # file above. for prefix in _PREFIXES: if prefix: f.write(prefix + 'dummy_function_at_the_end_of_text\n') def _CreateArgumentParser(): """Creates and returns the argument parser.""" parser = argparse.ArgumentParser() parser.add_argument('--target-arch', action='store', choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'], help='The target architecture for the library.') parser.add_argument('--unpatched-orderfile', required=True, help='Path to the unpatched orderfile') parser.add_argument('--native-library', required=True, help='Path to the native library') parser.add_argument('--output-file', required=True, help='Output filename') return parser def main(): parser = _CreateArgumentParser() options = parser.parse_args() if not options.target_arch: options.arch = cygprofile_utils.DetectArchitecture() symbol_extractor.SetArchitecture(options.target_arch) GeneratePatchedOrderfile(options.unpatched_orderfile, options.native_library, options.output_file) return 0 if __name__ == '__main__': logging.basicConfig(level=logging.INFO) sys.exit(main())