diff options
Diffstat (limited to 'extra/stack_analyzer/stack_analyzer.py')
-rwxr-xr-x | extra/stack_analyzer/stack_analyzer.py | 232 |
1 files changed, 194 insertions, 38 deletions
diff --git a/extra/stack_analyzer/stack_analyzer.py b/extra/stack_analyzer/stack_analyzer.py index 329e4faa6b..1c6276f399 100755 --- a/extra/stack_analyzer/stack_analyzer.py +++ b/extra/stack_analyzer/stack_analyzer.py @@ -246,6 +246,157 @@ class Function(object): return True +class AndesAnalyzer(object): + """Disassembly analyzer for Andes architecture. + + Public Methods: + AnalyzeFunction: Analyze stack frame and callsites of the function. + """ + + GENERAL_PURPOSE_REGISTER_SIZE = 4 + + # Possible condition code suffixes. + CONDITION_CODES = [ 'eq', 'eqz', 'gez', 'gtz', 'lez', 'ltz', 'ne', 'nez', + 'eqc', 'nec', 'nezs', 'nes', 'eqs'] + CONDITION_CODES_RE = '({})'.format('|'.join(CONDITION_CODES)) + + IMM_ADDRESS_RE = r'([0-9A-Fa-f]+)\s+<([^>]+)>' + # Branch instructions. + JUMP_OPCODE_RE = re.compile(r'^(b{0}|j|jr|jr.|jrnez)(\d?|\d\d)$' \ + .format(CONDITION_CODES_RE)) + # Call instructions. + CALL_OPCODE_RE = re.compile \ + (r'^(jal|jral|jral.|jralnez|beqzal|bltzal|bgezal)(\d)?$') + CALL_OPERAND_RE = re.compile(r'^{}$'.format(IMM_ADDRESS_RE)) + # Ignore lp register because it's for return. + INDIRECT_CALL_OPERAND_RE = re.compile \ + (r'^\$r\d{1,}$|\$fp$|\$gp$|\$ta$|\$sp$|\$pc$') + # TODO: Handle other kinds of store instructions. + PUSH_OPCODE_RE = re.compile(r'^push(\d{1,})$') + PUSH_OPERAND_RE = re.compile(r'^\$r\d{1,}, \#\d{1,} \! \{([^\]]+)\}') + SMW_OPCODE_RE = re.compile(r'^smw(\.\w\w|\.\w\w\w)$') + SMW_OPERAND_RE = re.compile(r'^(\$r\d{1,}|\$\w\p), \[\$\w\p\], ' + r'(\$r\d{1,}|\$\w\p), \#\d\w\d \! \{([^\]]+)\}') + OPERANDGROUP_RE = re.compile(r'^\$r\d{1,}\~\$r\d{1,}') + + LWI_OPCODE_RE = re.compile(r'^lwi(\.\w\w)$') + LWI_PC_OPERAND_RE = re.compile(r'^\$pc, \[([^\]]+)\]') + # Example: "34280: 3f c8 0f ec addi.gp $fp, #0xfec" + # Assume there is always a "\t" after the hex data. + DISASM_REGEX_RE = re.compile(r'^(?P<address>[0-9A-Fa-f]+):\s+' + r'(?P<words>[0-9A-Fa-f ]+)' + r'\t\s*(?P<opcode>\S+)(\s+(?P<operand>[^;]*))?') + + def ParseInstruction(self, line, function_end): + """Parse the line of instruction. + + Args: + line: Text of disassembly. + function_end: End address of the current function. None if unknown. + + Returns: + (address, words, opcode, operand_text): The instruction address, words, + opcode, and the text of operands. + None if it isn't an instruction line. + """ + result = self.DISASM_REGEX_RE.match(line) + if result is None: + return None + + address = int(result.group('address'), 16) + # Check if it's out of bound. + if function_end is not None and address >= function_end: + return None + + opcode = result.group('opcode').strip() + operand_text = result.group('operand') + words = result.group('words') + if operand_text is None: + operand_text = '' + else: + operand_text = operand_text.strip() + + return (address, words, opcode, operand_text) + + def AnalyzeFunction(self, function_symbol, instructions): + + stack_frame = 0 + callsites = [] + for address, words, opcode, operand_text in instructions: + is_jump_opcode = self.JUMP_OPCODE_RE.match(opcode) is not None + is_call_opcode = self.CALL_OPCODE_RE.match(opcode) is not None + + if is_jump_opcode or is_call_opcode: + is_tail = is_jump_opcode + + result = self.CALL_OPERAND_RE.match(operand_text) + + if result is None: + if (self.INDIRECT_CALL_OPERAND_RE.match(operand_text) is not None): + # Found an indirect call. + callsites.append(Callsite(address, None, is_tail)) + + else: + target_address = int(result.group(1), 16) + # Filter out the in-function target (branches and in-function calls, + # which are actually branches). + if not (function_symbol.size > 0 and + function_symbol.address < target_address < + (function_symbol.address + function_symbol.size)): + # Maybe it is a callsite. + callsites.append(Callsite(address, target_address, is_tail)) + + elif self.LWI_OPCODE_RE.match(opcode) is not None: + result = self.LWI_PC_OPERAND_RE.match(operand_text) + if result is not None: + # Ignore "lwi $pc, [$sp], xx" because it's usually a return. + if result.group(1) != '$sp': + # Found an indirect call. + callsites.append(Callsite(address, None, True)) + + elif self.PUSH_OPCODE_RE.match(opcode) is not None: + # Example: fc 20 push25 $r8, #0 ! {$r6~$r8, $fp, $gp, $lp} + if self.PUSH_OPERAND_RE.match(operand_text) is not None: + # capture fc 20 + imm5u = int(words.split(' ')[1], 16) + # sp = sp - (imm5u << 3) + imm8u = (imm5u<<3) & 0xff + stack_frame += imm8u + + result = self.PUSH_OPERAND_RE.match(operand_text) + operandgroup_text = result.group(1) + # capture $rx~$ry + if self.OPERANDGROUP_RE.match(operandgroup_text) is not None: + # capture number & transfer string to integer + oprandgrouphead = operandgroup_text.split(',')[0] + rx=int(filter(str.isdigit, oprandgrouphead.split('~')[0])) + ry=int(filter(str.isdigit, oprandgrouphead.split('~')[1])) + + stack_frame += ((len(operandgroup_text.split(','))+ry-rx) * + self.GENERAL_PURPOSE_REGISTER_SIZE) + else: + stack_frame += (len(operandgroup_text.split(',')) * + self.GENERAL_PURPOSE_REGISTER_SIZE) + + elif self.SMW_OPCODE_RE.match(opcode) is not None: + # Example: smw.adm $r6, [$sp], $r10, #0x2 ! {$r6~$r10, $lp} + if self.SMW_OPERAND_RE.match(operand_text) is not None: + result = self.SMW_OPERAND_RE.match(operand_text) + operandgroup_text = result.group(3) + # capture $rx~$ry + if self.OPERANDGROUP_RE.match(operandgroup_text) is not None: + # capture number & transfer string to integer + oprandgrouphead = operandgroup_text.split(',')[0] + rx=int(filter(str.isdigit, oprandgrouphead.split('~')[0])) + ry=int(filter(str.isdigit, oprandgrouphead.split('~')[1])) + + stack_frame += ((len(operandgroup_text.split(','))+ry-rx) * + self.GENERAL_PURPOSE_REGISTER_SIZE) + else: + stack_frame += (len(operandgroup_text.split(',')) * + self.GENERAL_PURPOSE_REGISTER_SIZE) + + return (stack_frame, callsites) class ArmAnalyzer(object): """Disassembly analyzer for ARM architecture. @@ -288,6 +439,40 @@ class ArmAnalyzer(object): # Stack subtraction instructions. SUB_OPCODE_RE = re.compile(r'^sub(s|w)?(\.\w)?$') SUB_OPERAND_RE = re.compile(r'^sp[^#]+#(\d+)') + # Example: "44d94: f893 0068 ldrb.w r0, [r3, #104] ; 0x68" + # Assume there is always a "\t" after the hex data. + DISASM_REGEX_RE = re.compile(r'^(?P<address>[0-9A-Fa-f]+):\s+[0-9A-Fa-f ]+' + r'\t\s*(?P<opcode>\S+)(\s+(?P<operand>[^;]*))?') + + def ParseInstruction(self, line, function_end): + """Parse the line of instruction. + + Args: + line: Text of disassembly. + function_end: End address of the current function. None if unknown. + + Returns: + (address, opcode, operand_text): The instruction address, opcode, + and the text of operands. None if it + isn't an instruction line. + """ + result = self.DISASM_REGEX_RE.match(line) + if result is None: + return None + + address = int(result.group('address'), 16) + # Check if it's out of bound. + if function_end is not None and address >= function_end: + return None + + opcode = result.group('opcode').strip() + operand_text = result.group('operand') + if operand_text is None: + operand_text = '' + else: + operand_text = operand_text.strip() + + return (address, opcode, operand_text) def AnalyzeFunction(self, function_symbol, instructions): """Analyze function, resolve the size of stack frame and callsites. @@ -469,16 +654,18 @@ class StackAnalyzer(object): Returns: function_map: Dict of functions. """ - # TODO(cheyuw): Select analyzer based on architecture. - analyzer = ArmAnalyzer() + disasm_lines = [line.strip() for line in disasm_text.splitlines()] + + if (disasm_lines[1].find("nds") != -1): + analyzer = AndesAnalyzer() + elif (disasm_lines[1].find("arm") != -1): + analyzer = ArmAnalyzer() + else: + raise StackAnalyzerError('Unsupported architecture.') # Example: "08028c8c <motion_lid_calc>:" function_signature_regex = re.compile( r'^(?P<address>[0-9A-Fa-f]+)\s+<(?P<name>[^>]+)>:$') - # Example: "44d94: f893 0068 ldrb.w r0, [r3, #104] ; 0x68" - # Assume there is always a "\t" after the hex data. - disasm_regex = re.compile(r'^(?P<address>[0-9A-Fa-f]+):\s+[0-9A-Fa-f ]+' - r'\t\s*(?P<opcode>\S+)(\s+(?P<operand>[^;]*))?') def DetectFunctionHead(line): """Check if the line is a function head. @@ -502,36 +689,6 @@ class StackAnalyzer(object): return symbol - def ParseInstruction(line, function_end): - """Parse the line of instruction. - - Args: - line: Text of disassembly. - function_end: End address of the current function. None if unknown. - - Returns: - (address, opcode, operand_text): The instruction address, opcode, - and the text of operands. None if it - isn't an instruction line. - """ - result = disasm_regex.match(line) - if result is None: - return None - - address = int(result.group('address'), 16) - # Check if it's out of bound. - if function_end is not None and address >= function_end: - return None - - opcode = result.group('opcode').strip() - operand_text = result.group('operand') - if operand_text is None: - operand_text = '' - else: - operand_text = operand_text.strip() - - return (address, opcode, operand_text) - # Build symbol map, indexed by symbol address. symbol_map = {} for symbol in self.symbols: @@ -558,7 +715,6 @@ class StackAnalyzer(object): instructions = [] # Remove heading and tailing spaces for each line. - disasm_lines = [line.strip() for line in disasm_text.splitlines()] line_index = 0 while line_index < len(disasm_lines): # Get the current line. @@ -584,7 +740,7 @@ class StackAnalyzer(object): else: # Step 2: Parse the function body. - instruction = ParseInstruction(line, function_end) + instruction = analyzer.ParseInstruction(line, function_end) if instruction is not None: instructions.append(instruction) |