#!/usr/bin/env python from __future__ import print_function """ Helper script to print out the raw content of an ELF section. Example usages: ``` # print out as bits by default extract-section.py .text --input-file=foo.o ``` ``` # read from stdin and print out in hex cat foo.o | extract-section.py -h .text ``` This is merely a wrapper around `llvm-readobj` that focuses on the binary content as well as providing more formatting options. """ # Unfortunately reading binary from stdin is not so trivial in Python... def read_raw_stdin(): import sys if sys.version_info >= (3, 0): reading_source = sys.stdin.buffer else: # Windows will always read as string so we need some # special handling if sys.platform == "win32": import os, msvcrt msvcrt.setformat(sys.stdin.fileno(), os.O_BINARY) reading_source = sys.stdin return reading_source.read() def get_raw_section_dump(readobj_path, section_name, input_file): import subprocess cmd = [ readobj_path, "--elf-output-style=GNU", "--hex-dump={}".format(section_name), input_file, ] proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) if input_file == "-": # From stdin out, _ = proc.communicate(input=read_raw_stdin()) else: out, _ = proc.communicate() return out.decode("utf-8") if type(out) is not str else out if __name__ == "__main__": import argparse # The default '-h' (--help) will conflict with our '-h' (hex) format arg_parser = argparse.ArgumentParser(add_help=False) arg_parser.add_argument( "--readobj-path", metavar="", type=str, help="Path to llvm-readobj", ) arg_parser.add_argument( "--input-file", metavar="", type=str, help="Input object file, or '-' to read from stdin", ) arg_parser.add_argument( "section", metavar="", type=str, help="Name of the section to extract" ) # Output format format_group = arg_parser.add_mutually_exclusive_group() format_group.add_argument( "-b", dest="format", action="store_const", const="bits", help="Print out in bits", ) arg_parser.add_argument( "--byte-indicator", action="store_true", help="Whether to print a '.' every 8 bits in bits printing mode", ) arg_parser.add_argument( "--bits-endian", metavar="", type=str, choices=["little", "big"], help="Print out bits in specified endianness (little or big); defaults to big", ) format_group.add_argument( "-h", dest="format", action="store_const", const="hex", help="Print out in hexadecimal", ) arg_parser.add_argument( "--hex-width", metavar="<# of bytes>", type=int, help="The width (in byte) of every element in hex printing mode", ) arg_parser.add_argument("--help", action="help") arg_parser.set_defaults( format="bits", tool_path="llvm-readobj", input_file="-", byte_indicator=False, hex_width=4, bits_endian="big", ) args = arg_parser.parse_args() raw_section = get_raw_section_dump(args.tool_path, args.section, args.input_file) results = [] for line in raw_section.splitlines(False): if line.startswith("Hex dump"): continue parts = line.strip().split(" ")[1:] for part in parts[:4]: # exclude any non-hex dump string try: val = int(part, 16) if args.format == "bits": # divided into bytes first offsets = (24, 16, 8, 0) if args.bits_endian == "little": offsets = (0, 8, 16, 24) for byte in [(val >> off) & 0xFF for off in offsets]: for bit in [(byte >> off) & 1 for off in range(7, -1, -1)]: results.append(str(bit)) if args.byte_indicator: results.append(".") elif args.format == "hex": assert args.hex_width <= 4 and args.hex_width > 0 width_bits = args.hex_width * 8 offsets = [off for off in range(32 - width_bits, -1, -width_bits)] mask = (1 << width_bits) - 1 format_str = "{:0" + str(args.hex_width * 2) + "x}" for word in [(val >> i) & mask for i in offsets]: results.append(format_str.format(word)) except: break print(" ".join(results), end="")