#------------------------------------------------------------------------------- # elftools: dwarf/structs.py # # Encapsulation of Construct structs for parsing DWARF, adjusted for correct # endianness and word-size. # # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- from ..construct import ( UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64, SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64, Adapter, Struct, ConstructError, If, RepeatUntil, Field, Rename, Enum, Array, PrefixedArray, CString, Embed, ) from ..common.construct_utils import RepeatUntilExcluding from .enums import * class DWARFStructs(object): """ Exposes Construct structs suitable for parsing information from DWARF sections. Each compile unit in DWARF info can have its own structs object. Keep in mind that these structs have to be given a name (by calling them with a name) before being used for parsing (like other Construct structs). Those that should be used without a name are marked by (+). Accessible attributes (mostly as described in chapter 7 of the DWARF spec v3): Dwarf_[u]int{8,16,32,64): Data chunks of the common sizes Dwarf_offset: 32-bit or 64-bit word, depending on dwarf_format Dwarf_target_addr: 32-bit or 64-bit word, depending on address size Dwarf_initial_length: "Initial length field" encoding section 7.4 Dwarf_{u,s}leb128: ULEB128 and SLEB128 variable-length encoding Dwarf_CU_header (+): Compilation unit header Dwarf_abbrev_declaration (+): Abbreviation table declaration - doesn't include the initial code, only the contents. Dwarf_dw_form (+): A dictionary mapping 'DW_FORM_*' keys into construct Structs that parse such forms. These Structs have already been given dummy names. Dwarf_lineprog_header (+): Line program header Dwarf_lineprog_file_entry (+): A single file entry in a line program header or instruction Dwarf_CIE_header (+): A call-frame CIE Dwarf_FDE_header (+): A call-frame FDE See also the documentation of public methods. """ def __init__(self, little_endian, dwarf_format, address_size): """ little_endian: True if the file is little endian, False if big dwarf_format: DWARF Format: 32 or 64-bit (see spec section 7.4) address_size: Target machine address size, in bytes (4 or 8). (See spec section 7.5.1) """ assert dwarf_format == 32 or dwarf_format == 64 assert address_size == 8 or address_size == 4 self.little_endian = little_endian self.dwarf_format = dwarf_format self.address_size = address_size self._create_structs() def initial_length_field_size(self): """ Size of an initial length field. """ return 4 if self.dwarf_format == 32 else 12 def _create_structs(self): if self.little_endian: self.Dwarf_uint8 = ULInt8 self.Dwarf_uint16 = ULInt16 self.Dwarf_uint32 = ULInt32 self.Dwarf_uint64 = ULInt64 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64 self.Dwarf_target_addr = ( ULInt32 if self.address_size == 4 else ULInt64) self.Dwarf_int8 = SLInt8 self.Dwarf_int16 = SLInt16 self.Dwarf_int32 = SLInt32 self.Dwarf_int64 = SLInt64 else: self.Dwarf_uint8 = UBInt8 self.Dwarf_uint16 = UBInt16 self.Dwarf_uint32 = UBInt32 self.Dwarf_uint64 = UBInt64 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64 self.Dwarf_target_addr = ( UBInt32 if self.address_size == 4 else UBInt64) self.Dwarf_int8 = SBInt8 self.Dwarf_int16 = SBInt16 self.Dwarf_int32 = SBInt32 self.Dwarf_int64 = SBInt64 self._create_initial_length() self._create_leb128() self._create_cu_header() self._create_abbrev_declaration() self._create_dw_form() self._create_lineprog_header() self._create_callframe_entry_headers() def _create_initial_length(self): def _InitialLength(name): # Adapts a Struct that parses forward a full initial length field. # Only if the first word is the continuation value, the second # word is parsed from the stream. # return _InitialLengthAdapter( Struct(name, self.Dwarf_uint32('first'), If(lambda ctx: ctx.first == 0xFFFFFFFF, self.Dwarf_uint64('second'), elsevalue=None))) self.Dwarf_initial_length = _InitialLength def _create_leb128(self): self.Dwarf_uleb128 = _ULEB128 self.Dwarf_sleb128 = _SLEB128 def _create_cu_header(self): self.Dwarf_CU_header = Struct('Dwarf_CU_header', self.Dwarf_initial_length('unit_length'), self.Dwarf_uint16('version'), self.Dwarf_offset('debug_abbrev_offset'), self.Dwarf_uint8('address_size')) def _create_abbrev_declaration(self): self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry', Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG), Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN), RepeatUntilExcluding( lambda obj, ctx: obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null', Struct('attr_spec', Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT), Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)))) def _create_dw_form(self): self.Dwarf_dw_form = dict( DW_FORM_addr=self.Dwarf_target_addr(''), DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8), DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16), DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32), DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128), # All DW_FORM_data forms are assumed to be unsigned DW_FORM_data1=self.Dwarf_uint8(''), DW_FORM_data2=self.Dwarf_uint16(''), DW_FORM_data4=self.Dwarf_uint32(''), DW_FORM_data8=self.Dwarf_uint64(''), DW_FORM_sdata=self.Dwarf_sleb128(''), DW_FORM_udata=self.Dwarf_uleb128(''), DW_FORM_string=CString(''), DW_FORM_strp=self.Dwarf_offset(''), DW_FORM_flag=self.Dwarf_uint8(''), DW_FORM_ref1=self.Dwarf_uint8(''), DW_FORM_ref2=self.Dwarf_uint16(''), DW_FORM_ref4=self.Dwarf_uint32(''), DW_FORM_ref8=self.Dwarf_uint64(''), DW_FORM_ref_udata=self.Dwarf_uleb128(''), DW_FORM_ref_addr=self.Dwarf_offset(''), DW_FORM_indirect=self.Dwarf_uleb128(''), ) def _create_lineprog_header(self): # A file entry is terminated by a NULL byte, so we don't want to parse # past it. Therefore an If is used. self.Dwarf_lineprog_file_entry = Struct('file_entry', CString('name'), If(lambda ctx: len(ctx.name) != 0, Embed(Struct('', self.Dwarf_uleb128('dir_index'), self.Dwarf_uleb128('mtime'), self.Dwarf_uleb128('length'))))) self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header', self.Dwarf_initial_length('unit_length'), self.Dwarf_uint16('version'), self.Dwarf_offset('header_length'), self.Dwarf_uint8('minimum_instruction_length'), self.Dwarf_uint8('default_is_stmt'), self.Dwarf_int8('line_base'), self.Dwarf_uint8('line_range'), self.Dwarf_uint8('opcode_base'), Array(lambda ctx: ctx['opcode_base'] - 1, self.Dwarf_uint8('standard_opcode_lengths')), RepeatUntilExcluding( lambda obj, ctx: obj == b'', CString('include_directory')), RepeatUntilExcluding( lambda obj, ctx: len(obj.name) == 0, self.Dwarf_lineprog_file_entry), ) def _create_callframe_entry_headers(self): self.Dwarf_CIE_header = Struct('Dwarf_CIE_header', self.Dwarf_initial_length('length'), self.Dwarf_offset('CIE_id'), self.Dwarf_uint8('version'), CString('augmentation'), self.Dwarf_uleb128('code_alignment_factor'), self.Dwarf_sleb128('data_alignment_factor'), self.Dwarf_uleb128('return_address_register')) self.Dwarf_FDE_header = Struct('Dwarf_FDE_header', self.Dwarf_initial_length('length'), self.Dwarf_offset('CIE_pointer'), self.Dwarf_target_addr('initial_location'), self.Dwarf_target_addr('address_range')) def _make_block_struct(self, length_field): """ Create a struct for DW_FORM_block """ return PrefixedArray( subcon=self.Dwarf_uint8('elem'), length_field=length_field('')) class _InitialLengthAdapter(Adapter): """ A standard Construct adapter that expects a sub-construct as a struct with one or two values (first, second). """ def _decode(self, obj, context): if obj.first < 0xFFFFFF00: return obj.first else: if obj.first == 0xFFFFFFFF: return obj.second else: raise ConstructError("Failed decoding initial length for %X" % ( obj.first)) def _LEB128_reader(): """ Read LEB128 variable-length data from the stream. The data is terminated by a byte with 0 in its highest bit. """ return RepeatUntil( lambda obj, ctx: ord(obj) < 0x80, Field(None, 1)) class _ULEB128Adapter(Adapter): """ An adapter for ULEB128, given a sequence of bytes in a sub-construct. """ def _decode(self, obj, context): value = 0 for b in reversed(obj): value = (value << 7) + (ord(b) & 0x7F) return value class _SLEB128Adapter(Adapter): """ An adapter for SLEB128, given a sequence of bytes in a sub-construct. """ def _decode(self, obj, context): value = 0 for b in reversed(obj): value = (value << 7) + (ord(b) & 0x7F) if ord(obj[-1]) & 0x40: # negative -> sign extend # value |= - (1 << (7 * len(obj))) return value def _ULEB128(name): """ A construct creator for ULEB128 encoding. """ return Rename(name, _ULEB128Adapter(_LEB128_reader())) def _SLEB128(name): """ A construct creator for SLEB128 encoding. """ return Rename(name, _SLEB128Adapter(_LEB128_reader()))