summaryrefslogtreecommitdiff
path: root/extra/stack_analyzer
diff options
context:
space:
mode:
Diffstat (limited to 'extra/stack_analyzer')
-rwxr-xr-xextra/stack_analyzer/stack_analyzer.py232
1 files changed, 194 insertions, 38 deletions
diff --git a/extra/stack_analyzer/stack_analyzer.py b/extra/stack_analyzer/stack_analyzer.py
index 329e4faa6b..1c6276f399 100755
--- a/extra/stack_analyzer/stack_analyzer.py
+++ b/extra/stack_analyzer/stack_analyzer.py
@@ -246,6 +246,157 @@ class Function(object):
return True
+class AndesAnalyzer(object):
+ """Disassembly analyzer for Andes architecture.
+
+ Public Methods:
+ AnalyzeFunction: Analyze stack frame and callsites of the function.
+ """
+
+ GENERAL_PURPOSE_REGISTER_SIZE = 4
+
+ # Possible condition code suffixes.
+ CONDITION_CODES = [ 'eq', 'eqz', 'gez', 'gtz', 'lez', 'ltz', 'ne', 'nez',
+ 'eqc', 'nec', 'nezs', 'nes', 'eqs']
+ CONDITION_CODES_RE = '({})'.format('|'.join(CONDITION_CODES))
+
+ IMM_ADDRESS_RE = r'([0-9A-Fa-f]+)\s+<([^>]+)>'
+ # Branch instructions.
+ JUMP_OPCODE_RE = re.compile(r'^(b{0}|j|jr|jr.|jrnez)(\d?|\d\d)$' \
+ .format(CONDITION_CODES_RE))
+ # Call instructions.
+ CALL_OPCODE_RE = re.compile \
+ (r'^(jal|jral|jral.|jralnez|beqzal|bltzal|bgezal)(\d)?$')
+ CALL_OPERAND_RE = re.compile(r'^{}$'.format(IMM_ADDRESS_RE))
+ # Ignore lp register because it's for return.
+ INDIRECT_CALL_OPERAND_RE = re.compile \
+ (r'^\$r\d{1,}$|\$fp$|\$gp$|\$ta$|\$sp$|\$pc$')
+ # TODO: Handle other kinds of store instructions.
+ PUSH_OPCODE_RE = re.compile(r'^push(\d{1,})$')
+ PUSH_OPERAND_RE = re.compile(r'^\$r\d{1,}, \#\d{1,} \! \{([^\]]+)\}')
+ SMW_OPCODE_RE = re.compile(r'^smw(\.\w\w|\.\w\w\w)$')
+ SMW_OPERAND_RE = re.compile(r'^(\$r\d{1,}|\$\w\p), \[\$\w\p\], '
+ r'(\$r\d{1,}|\$\w\p), \#\d\w\d \! \{([^\]]+)\}')
+ OPERANDGROUP_RE = re.compile(r'^\$r\d{1,}\~\$r\d{1,}')
+
+ LWI_OPCODE_RE = re.compile(r'^lwi(\.\w\w)$')
+ LWI_PC_OPERAND_RE = re.compile(r'^\$pc, \[([^\]]+)\]')
+ # Example: "34280: 3f c8 0f ec addi.gp $fp, #0xfec"
+ # Assume there is always a "\t" after the hex data.
+ DISASM_REGEX_RE = re.compile(r'^(?P<address>[0-9A-Fa-f]+):\s+'
+ r'(?P<words>[0-9A-Fa-f ]+)'
+ r'\t\s*(?P<opcode>\S+)(\s+(?P<operand>[^;]*))?')
+
+ def ParseInstruction(self, line, function_end):
+ """Parse the line of instruction.
+
+ Args:
+ line: Text of disassembly.
+ function_end: End address of the current function. None if unknown.
+
+ Returns:
+ (address, words, opcode, operand_text): The instruction address, words,
+ opcode, and the text of operands.
+ None if it isn't an instruction line.
+ """
+ result = self.DISASM_REGEX_RE.match(line)
+ if result is None:
+ return None
+
+ address = int(result.group('address'), 16)
+ # Check if it's out of bound.
+ if function_end is not None and address >= function_end:
+ return None
+
+ opcode = result.group('opcode').strip()
+ operand_text = result.group('operand')
+ words = result.group('words')
+ if operand_text is None:
+ operand_text = ''
+ else:
+ operand_text = operand_text.strip()
+
+ return (address, words, opcode, operand_text)
+
+ def AnalyzeFunction(self, function_symbol, instructions):
+
+ stack_frame = 0
+ callsites = []
+ for address, words, opcode, operand_text in instructions:
+ is_jump_opcode = self.JUMP_OPCODE_RE.match(opcode) is not None
+ is_call_opcode = self.CALL_OPCODE_RE.match(opcode) is not None
+
+ if is_jump_opcode or is_call_opcode:
+ is_tail = is_jump_opcode
+
+ result = self.CALL_OPERAND_RE.match(operand_text)
+
+ if result is None:
+ if (self.INDIRECT_CALL_OPERAND_RE.match(operand_text) is not None):
+ # Found an indirect call.
+ callsites.append(Callsite(address, None, is_tail))
+
+ else:
+ target_address = int(result.group(1), 16)
+ # Filter out the in-function target (branches and in-function calls,
+ # which are actually branches).
+ if not (function_symbol.size > 0 and
+ function_symbol.address < target_address <
+ (function_symbol.address + function_symbol.size)):
+ # Maybe it is a callsite.
+ callsites.append(Callsite(address, target_address, is_tail))
+
+ elif self.LWI_OPCODE_RE.match(opcode) is not None:
+ result = self.LWI_PC_OPERAND_RE.match(operand_text)
+ if result is not None:
+ # Ignore "lwi $pc, [$sp], xx" because it's usually a return.
+ if result.group(1) != '$sp':
+ # Found an indirect call.
+ callsites.append(Callsite(address, None, True))
+
+ elif self.PUSH_OPCODE_RE.match(opcode) is not None:
+ # Example: fc 20 push25 $r8, #0 ! {$r6~$r8, $fp, $gp, $lp}
+ if self.PUSH_OPERAND_RE.match(operand_text) is not None:
+ # capture fc 20
+ imm5u = int(words.split(' ')[1], 16)
+ # sp = sp - (imm5u << 3)
+ imm8u = (imm5u<<3) & 0xff
+ stack_frame += imm8u
+
+ result = self.PUSH_OPERAND_RE.match(operand_text)
+ operandgroup_text = result.group(1)
+ # capture $rx~$ry
+ if self.OPERANDGROUP_RE.match(operandgroup_text) is not None:
+ # capture number & transfer string to integer
+ oprandgrouphead = operandgroup_text.split(',')[0]
+ rx=int(filter(str.isdigit, oprandgrouphead.split('~')[0]))
+ ry=int(filter(str.isdigit, oprandgrouphead.split('~')[1]))
+
+ stack_frame += ((len(operandgroup_text.split(','))+ry-rx) *
+ self.GENERAL_PURPOSE_REGISTER_SIZE)
+ else:
+ stack_frame += (len(operandgroup_text.split(',')) *
+ self.GENERAL_PURPOSE_REGISTER_SIZE)
+
+ elif self.SMW_OPCODE_RE.match(opcode) is not None:
+ # Example: smw.adm $r6, [$sp], $r10, #0x2 ! {$r6~$r10, $lp}
+ if self.SMW_OPERAND_RE.match(operand_text) is not None:
+ result = self.SMW_OPERAND_RE.match(operand_text)
+ operandgroup_text = result.group(3)
+ # capture $rx~$ry
+ if self.OPERANDGROUP_RE.match(operandgroup_text) is not None:
+ # capture number & transfer string to integer
+ oprandgrouphead = operandgroup_text.split(',')[0]
+ rx=int(filter(str.isdigit, oprandgrouphead.split('~')[0]))
+ ry=int(filter(str.isdigit, oprandgrouphead.split('~')[1]))
+
+ stack_frame += ((len(operandgroup_text.split(','))+ry-rx) *
+ self.GENERAL_PURPOSE_REGISTER_SIZE)
+ else:
+ stack_frame += (len(operandgroup_text.split(',')) *
+ self.GENERAL_PURPOSE_REGISTER_SIZE)
+
+ return (stack_frame, callsites)
class ArmAnalyzer(object):
"""Disassembly analyzer for ARM architecture.
@@ -288,6 +439,40 @@ class ArmAnalyzer(object):
# Stack subtraction instructions.
SUB_OPCODE_RE = re.compile(r'^sub(s|w)?(\.\w)?$')
SUB_OPERAND_RE = re.compile(r'^sp[^#]+#(\d+)')
+ # Example: "44d94: f893 0068 ldrb.w r0, [r3, #104] ; 0x68"
+ # Assume there is always a "\t" after the hex data.
+ DISASM_REGEX_RE = re.compile(r'^(?P<address>[0-9A-Fa-f]+):\s+[0-9A-Fa-f ]+'
+ r'\t\s*(?P<opcode>\S+)(\s+(?P<operand>[^;]*))?')
+
+ def ParseInstruction(self, line, function_end):
+ """Parse the line of instruction.
+
+ Args:
+ line: Text of disassembly.
+ function_end: End address of the current function. None if unknown.
+
+ Returns:
+ (address, opcode, operand_text): The instruction address, opcode,
+ and the text of operands. None if it
+ isn't an instruction line.
+ """
+ result = self.DISASM_REGEX_RE.match(line)
+ if result is None:
+ return None
+
+ address = int(result.group('address'), 16)
+ # Check if it's out of bound.
+ if function_end is not None and address >= function_end:
+ return None
+
+ opcode = result.group('opcode').strip()
+ operand_text = result.group('operand')
+ if operand_text is None:
+ operand_text = ''
+ else:
+ operand_text = operand_text.strip()
+
+ return (address, opcode, operand_text)
def AnalyzeFunction(self, function_symbol, instructions):
"""Analyze function, resolve the size of stack frame and callsites.
@@ -469,16 +654,18 @@ class StackAnalyzer(object):
Returns:
function_map: Dict of functions.
"""
- # TODO(cheyuw): Select analyzer based on architecture.
- analyzer = ArmAnalyzer()
+ disasm_lines = [line.strip() for line in disasm_text.splitlines()]
+
+ if (disasm_lines[1].find("nds") != -1):
+ analyzer = AndesAnalyzer()
+ elif (disasm_lines[1].find("arm") != -1):
+ analyzer = ArmAnalyzer()
+ else:
+ raise StackAnalyzerError('Unsupported architecture.')
# Example: "08028c8c <motion_lid_calc>:"
function_signature_regex = re.compile(
r'^(?P<address>[0-9A-Fa-f]+)\s+<(?P<name>[^>]+)>:$')
- # Example: "44d94: f893 0068 ldrb.w r0, [r3, #104] ; 0x68"
- # Assume there is always a "\t" after the hex data.
- disasm_regex = re.compile(r'^(?P<address>[0-9A-Fa-f]+):\s+[0-9A-Fa-f ]+'
- r'\t\s*(?P<opcode>\S+)(\s+(?P<operand>[^;]*))?')
def DetectFunctionHead(line):
"""Check if the line is a function head.
@@ -502,36 +689,6 @@ class StackAnalyzer(object):
return symbol
- def ParseInstruction(line, function_end):
- """Parse the line of instruction.
-
- Args:
- line: Text of disassembly.
- function_end: End address of the current function. None if unknown.
-
- Returns:
- (address, opcode, operand_text): The instruction address, opcode,
- and the text of operands. None if it
- isn't an instruction line.
- """
- result = disasm_regex.match(line)
- if result is None:
- return None
-
- address = int(result.group('address'), 16)
- # Check if it's out of bound.
- if function_end is not None and address >= function_end:
- return None
-
- opcode = result.group('opcode').strip()
- operand_text = result.group('operand')
- if operand_text is None:
- operand_text = ''
- else:
- operand_text = operand_text.strip()
-
- return (address, opcode, operand_text)
-
# Build symbol map, indexed by symbol address.
symbol_map = {}
for symbol in self.symbols:
@@ -558,7 +715,6 @@ class StackAnalyzer(object):
instructions = []
# Remove heading and tailing spaces for each line.
- disasm_lines = [line.strip() for line in disasm_text.splitlines()]
line_index = 0
while line_index < len(disasm_lines):
# Get the current line.
@@ -584,7 +740,7 @@ class StackAnalyzer(object):
else:
# Step 2: Parse the function body.
- instruction = ParseInstruction(line, function_end)
+ instruction = analyzer.ParseInstruction(line, function_end)
if instruction is not None:
instructions.append(instruction)