#-------------------------------------------------------------------------------
# elftools: dwarf/callframe.py
#
# DWARF call frame information
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
import copy
from collections import namedtuple
from ..common.utils import (struct_parse, dwarf_assert, preserve_stream_pos)
from ..common.py3compat import iterkeys
from .structs import DWARFStructs
from .constants import * 


class CallFrameInfo(object):
    """ DWARF CFI (Call Frame Info)
        
        stream, size:
            A stream holding the .debug_frame section, and the size of the
            section in it.

        base_structs:
            The structs to be used as the base for parsing this section.
            Eventually, each entry gets its own structs based on the initial
            length field it starts with. The address_size, however, is taken
            from base_structs. This appears to be a limitation of the DWARFv3
            standard, fixed in v4 (where an address_size field exists for each
            CFI. I had a discussion about this on dwarf-discuss that confirms
            this.
            Currently for base_structs I simply use the elfclass of the
            containing file, but more sophisticated methods are used by
            libdwarf and others, such as guessing which CU contains which FDEs
            (based on their address ranges) and taking the address_size from
            those CUs.
    """
    def __init__(self, stream, size, base_structs):
        self.stream = stream
        self.size = size
        self.base_structs = base_structs
        self.entries = None

        # Map between an offset in the stream and the entry object found at this
        # offset. Useful for assigning CIE to FDEs according to the CIE_pointer
        # header field which contains a stream offset.
        self._entry_cache = {}

    def get_entries(self):
        """ Get a list of entries that constitute this CFI. The list consists
            of CIE or FDE objects, in the order of their appearance in the
            section.
        """
        if self.entries is None:
            self.entries = self._parse_entries()
        return self.entries

    #-------------------------

    def _parse_entries(self):
        entries = []
        offset = 0
        while offset < self.size:
            entries.append(self._parse_entry_at(offset))
            offset = self.stream.tell()
        return entries

    def _parse_entry_at(self, offset):
        """ Parse an entry from self.stream starting with the given offset.
            Return the entry object. self.stream will point right after the
            entry.
        """
        if offset in self._entry_cache:
            return self._entry_cache[offset]

        entry_length = struct_parse(
            self.base_structs.Dwarf_uint32(''), self.stream, offset)
        dwarf_format = 64 if entry_length == 0xFFFFFFFF else 32

        entry_structs = DWARFStructs(
            little_endian=self.base_structs.little_endian,
            dwarf_format=dwarf_format,
            address_size=self.base_structs.address_size)

        # Read the next field to see whether this is a CIE or FDE
        CIE_id = struct_parse(
            entry_structs.Dwarf_offset(''), self.stream)

        is_CIE = (
            (dwarf_format == 32 and CIE_id == 0xFFFFFFFF) or 
            CIE_id == 0xFFFFFFFFFFFFFFFF)

        if is_CIE:
            header_struct = entry_structs.Dwarf_CIE_header
        else:
            header_struct = entry_structs.Dwarf_FDE_header

        # Parse the header, which goes up to and including the
        # return_address_register field
        header = struct_parse(
            header_struct, self.stream, offset)

        # For convenience, compute the end offset for this entry
        end_offset = (
            offset + header.length +
            entry_structs.initial_length_field_size())

        # At this point self.stream is at the start of the instruction list
        # for this entry
        instructions = self._parse_instructions(
            entry_structs, self.stream.tell(), end_offset)

        if is_CIE:
            self._entry_cache[offset] = CIE(
                header=header, instructions=instructions, offset=offset,
                structs=entry_structs)
        else: # FDE
            with preserve_stream_pos(self.stream):
                cie = self._parse_entry_at(header['CIE_pointer'])
            self._entry_cache[offset] = FDE(
                header=header, instructions=instructions, offset=offset,
                structs=entry_structs, cie=cie)
        return self._entry_cache[offset]

    def _parse_instructions(self, structs, offset, end_offset):
        """ Parse a list of CFI instructions from self.stream, starting with
            the offset and until (not including) end_offset.
            Return a list of CallFrameInstruction objects.
        """
        instructions = []
        while offset < end_offset:
            opcode = struct_parse(structs.Dwarf_uint8(''), self.stream, offset)
            args = []

            primary = opcode & _PRIMARY_MASK
            primary_arg = opcode & _PRIMARY_ARG_MASK
            if primary == DW_CFA_advance_loc:
                args = [primary_arg]
            elif primary == DW_CFA_offset:
                args = [
                    primary_arg,
                    struct_parse(structs.Dwarf_uleb128(''), self.stream)]
            elif primary == DW_CFA_restore:
                args = [primary_arg]
            # primary == 0 and real opcode is extended
            elif opcode in (DW_CFA_nop, DW_CFA_remember_state,
                            DW_CFA_restore_state):
                args = []
            elif opcode == DW_CFA_set_loc:
                args = [
                    struct_parse(structs.Dwarf_target_addr(''), self.stream)]
            elif opcode == DW_CFA_advance_loc1:
                args = [struct_parse(structs.Dwarf_uint8(''), self.stream)]
            elif opcode == DW_CFA_advance_loc2:
                args = [struct_parse(structs.Dwarf_uint16(''), self.stream)]
            elif opcode == DW_CFA_advance_loc4:
                args = [struct_parse(structs.Dwarf_uint32(''), self.stream)]
            elif opcode in (DW_CFA_offset_extended, DW_CFA_register,
                            DW_CFA_def_cfa, DW_CFA_val_offset):
                args = [
                    struct_parse(structs.Dwarf_uleb128(''), self.stream),
                    struct_parse(structs.Dwarf_uleb128(''), self.stream)]
            elif opcode in (DW_CFA_restore_extended, DW_CFA_undefined,
                            DW_CFA_same_value, DW_CFA_def_cfa_register,
                            DW_CFA_def_cfa_offset):
                args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)]
            elif opcode == DW_CFA_def_cfa_offset_sf:
                args = [struct_parse(structs.Dwarf_sleb128(''), self.stream)]
            elif opcode == DW_CFA_def_cfa_expression:
                args = [struct_parse(
                    structs.Dwarf_dw_form['DW_FORM_block'], self.stream)]
            elif opcode in (DW_CFA_expression, DW_CFA_val_expression):
                args = [
                    struct_parse(structs.Dwarf_uleb128(''), self.stream),
                    struct_parse(
                        structs.Dwarf_dw_form['DW_FORM_block'], self.stream)]
            elif opcode in (DW_CFA_offset_extended_sf,
                            DW_CFA_def_cfa_sf, DW_CFA_val_offset_sf):
                args = [
                    struct_parse(structs.Dwarf_uleb128(''), self.stream),
                    struct_parse(structs.Dwarf_sleb128(''), self.stream)]
            else:
                dwarf_assert(False, 'Unknown CFI opcode: 0x%x' % opcode)

            instructions.append(CallFrameInstruction(opcode=opcode, args=args))
            offset = self.stream.tell()
        return instructions


def instruction_name(opcode):
    """ Given an opcode, return the instruction name.
    """
    primary = opcode & _PRIMARY_MASK
    if primary == 0:
        return _OPCODE_NAME_MAP[opcode]
    else:
        return _OPCODE_NAME_MAP[primary]


class CallFrameInstruction(object):
    """ An instruction in the CFI section. opcode is the instruction
        opcode, numeric - as it appears in the section. args is a list of
        arguments (including arguments embedded in the low bits of some
        instructions, when applicable), decoded from the stream.
    """
    def __init__(self, opcode, args):
        self.opcode = opcode
        self.args = args

    def __repr__(self):
        return '%s (0x%x): %s' % (
            instruction_name(self.opcode), self.opcode, self.args)


class CFIEntry(object):
    """ A common base class for CFI entries.
        Contains a header and a list of instructions (CallFrameInstruction).
        offset: the offset of this entry from the beginning of the section
        cie: for FDEs, a CIE pointer is required
    """
    def __init__(self, header, structs, instructions, offset, cie=None):
        self.header = header
        self.structs = structs
        self.instructions = instructions
        self.offset = offset
        self.cie = cie
        self._decoded_table = None

    def get_decoded(self):
        """ Decode the CFI contained in this entry and return a
            DecodedCallFrameTable object representing it. See the documentation
            of that class to understand how to interpret the decoded table.
        """
        if self._decoded_table is None:
            self._decoded_table = self._decode_CFI_table()
        return self._decoded_table

    def __getitem__(self, name):
        """ Implement dict-like access to header entries
        """
        return self.header[name]

    def _decode_CFI_table(self):
        """ Decode the instructions contained in the given CFI entry and return
            a DecodedCallFrameTable.
        """
        if isinstance(self, CIE):
            # For a CIE, initialize cur_line to an "empty" line
            cie = self
            cur_line = dict(pc=0, cfa=None)
            reg_order = []
        else: # FDE
            # For a FDE, we need to decode the attached CIE first, because its
            # decoded table is needed. Its "initial instructions" describe a 
            # line that serves as the base (first) line in the FDE's table.
            cie = self.cie
            cie_decoded_table = cie.get_decoded()
            last_line_in_CIE = copy.copy(cie_decoded_table.table[-1])
            cur_line = last_line_in_CIE
            cur_line['pc'] = self['initial_location']
            reg_order = copy.copy(cie_decoded_table.reg_order)
        
        table = []

        # Keeps a stack for the use of DW_CFA_{remember|restore}_state
        # instructions.
        line_stack = []

        def _add_to_order(regnum):
            if regnum not in cur_line:
                reg_order.append(regnum)

        for instr in self.instructions:
            # Throughout this loop, cur_line is the current line. Some
            # instructions add it to the table, but most instructions just
            # update it without adding it to the table.

            name = instruction_name(instr.opcode)

            if name == 'DW_CFA_set_loc':
                table.append(copy.copy(cur_line))
                cur_line['pc'] = instr.args[0]
            elif name in (  'DW_CFA_advance_loc1', 'DW_CFA_advance_loc2',
                            'DW_CFA_advance_loc4', 'DW_CFA_advance_loc'):
                table.append(copy.copy(cur_line))
                cur_line['pc'] += instr.args[0] * cie['code_alignment_factor']
            elif name == 'DW_CFA_def_cfa':
                cur_line['cfa'] = CFARule(
                    reg=instr.args[0],
                    offset=instr.args[1])
            elif name == 'DW_CFA_def_cfa_sf':
                cur_line['cfa'] = CFARule(
                    reg=instr.args[0],
                    offset=instr.args[1] * cie['code_alignment_factor'])
            elif name == 'DW_CFA_def_cfa_register':
                cur_line['cfa'] = CFARule(
                    reg=instr.args[0],
                    offset=cur_line['cfa'].offset)
            elif name == 'DW_CFA_def_cfa_offset':
                cur_line['cfa'] = CFARule(
                    reg=cur_line['cfa'].reg,
                    offset=instr.args[0])
            elif name == 'DW_CFA_def_cfa_expression':
                cur_line['cfa'] = CFARule(expr=instr.args[0])
            elif name == 'DW_CFA_undefined':
                _add_to_order(instr.args[0])
                cur_line[instr.args[0]] = RegisterRule(RegisterRule.UNDEFINED)
            elif name == 'DW_CFA_same_value':
                _add_to_order(instr.args[0])
                cur_line[instr.args[0]] = RegisterRule(RegisterRule.SAME_VALUE)
            elif name in (  'DW_CFA_offset', 'DW_CFA_offset_extended',
                            'DW_CFA_offset_extended_sf'):
                _add_to_order(instr.args[0])
                cur_line[instr.args[0]] = RegisterRule(
                    RegisterRule.OFFSET,
                    instr.args[1] * cie['data_alignment_factor'])
            elif name in ('DW_CFA_val_offset', 'DW_CFA_val_offset_sf'):
                _add_to_order(instr.args[0])
                cur_line[instr.args[0]] = RegisterRule(
                    RegisterRule.VAL_OFFSET,
                    instr.args[1] * cie['data_alignment_factor'])
            elif name == 'DW_CFA_register':
                _add_to_order(instr.args[0])
                cur_line[instr.args[0]] = RegisterRule(
                    RegisterRule.REGISTER,
                    instr.args[1])
            elif name == 'DW_CFA_expression':
                _add_to_order(instr.args[0])
                cur_line[instr.args[0]] = RegisterRule(
                    RegisterRule.EXPRESSION,
                    instr.args[1])
            elif name == 'DW_CFA_val_expression':
                _add_to_order(instr.args[0])
                cur_line[instr.args[0]] = RegisterRule(
                    RegisterRule.VAL_EXPRESSION,
                    instr.args[1])
            elif name in ('DW_CFA_restore', 'DW_CFA_restore_extended'):
                _add_to_order(instr.args[0])
                dwarf_assert(
                    isinstance(self, FDE),
                    '%s instruction must be in a FDE' % name)
                dwarf_assert(
                    instr.args[0] in last_line_in_CIE,
                    '%s: can not find register in CIE')
                cur_line[instr.args[0]] = last_line_in_CIE[instr.args[0]]
            elif name == 'DW_CFA_remember_state':
                line_stack.append(cur_line)
            elif name == 'DW_CFA_restore_state':
                cur_line = line_stack.pop()

        # The current line is appended to the table after all instructions
        # have ended, in any case (even if there were no instructions).
        table.append(cur_line)
        return DecodedCallFrameTable(table=table, reg_order=reg_order)


# A CIE and FDE have exactly the same functionality, except that a FDE has
# a pointer to its CIE. The functionality was wholly encapsulated in CFIEntry,
# so the CIE and FDE classes exists separately for identification (instead
# of having an explicit "entry_type" field in CFIEntry).
#
class CIE(CFIEntry):
    pass


class FDE(CFIEntry):
    pass


class RegisterRule(object):
    """ Register rules are used to find registers in call frames. Each rule
        consists of a type (enumeration following DWARFv3 section 6.4.1)
        and an optional argument to augment the type.
    """
    UNDEFINED = 'UNDEFINED'
    SAME_VALUE = 'SAME_VALUE'
    OFFSET = 'OFFSET'
    VAL_OFFSET = 'VAL_OFFSET'
    REGISTER = 'REGISTER'
    EXPRESSION = 'EXPRESSION'
    VAL_EXPRESSION = 'VAL_EXPRESSION'
    ARCHITECTURAL = 'ARCHITECTURAL'

    def __init__(self, type, arg=None):
        self.type = type
        self.arg = arg

    def __repr__(self):
        return 'RegisterRule(%s, %s)' % (self.type, self.arg)


class CFARule(object):
    """ A CFA rule is used to compute the CFA for each location. It either
        consists of a register+offset, or a DWARF expression.
    """
    def __init__(self, reg=None, offset=None, expr=None):
        self.reg = reg
        self.offset = offset
        self.expr = expr

    def __repr__(self):
        return 'CFARule(reg=%s, offset=%s, expr=%s)' % (
            self.reg, self.offset, self.expr)


# Represents the decoded CFI for an entry, which is just a large table,
# according to DWARFv3 section 6.4.1
#
# DecodedCallFrameTable is a simple named tuple to group together the table
# and the register appearance order.
#
# table:
#
# A list of dicts that represent "lines" in the decoded table. Each line has
# some special dict entries: 'pc' for the location/program counter (LOC),
# and 'cfa' for the CFARule to locate the CFA on that line.
# The other entries are keyed by register numbers with RegisterRule values,
# and describe the rules for these registers.
#
# reg_order:
#
# A list of register numbers that are described in the table by the order of
# their appearance.
#
DecodedCallFrameTable = namedtuple(
    'DecodedCallFrameTable', 'table reg_order')


#---------------- PRIVATE ----------------#

_PRIMARY_MASK = 0b11000000
_PRIMARY_ARG_MASK = 0b00111111

# This dictionary is filled by automatically scanning the constants module
# for DW_CFA_* instructions, and mapping their values to names. Since all
# names were imported from constants with `import *`, we look in globals()
_OPCODE_NAME_MAP = {}
for name in list(iterkeys(globals())):
    if name.startswith('DW_CFA'):
        _OPCODE_NAME_MAP[globals()[name]] = name