util: add script useful to parse thousands of crash reports

This CL adds a script that is useful to parse thousands of crash reports. It is useful to parse watchdog crash reports where only the PC is provided. And by analyzing thousands of watchdog crash reports, chances are that we find the function that triggers the watchdogs. This CL should be used together with this other script: https://source.corp.google.com/piper///depot/google3/experimental/users/ricardoq/crashpad/main.py which lives in Google3 since it has to perform an SQL query. BUG=None TEST=crash_analyzer.py lite -m ~/tmp/rammus_193.map -f /tmp/dumps And it generated valid report. BRANCH=None Change-Id: I954c8b89a01dd2b55c07e19111b3ba887b119215 Signed-off-by: ricardoq@chromium.org Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/ec/+/3953260 Reviewed-by: Jeremy Bettis <jbettis@chromium.org>
author: Ricardo Quesada <ricardoq@google.com> 2022-10-13 14:33:53 -0700
committer: Chromeos LUCI <chromeos-scoped@luci-project-accounts.iam.gserviceaccount.com> 2022-10-14 21:46:41 +0000
commit: 8f48250c7e82e6d64119a8efed4f79ac40d381c4 (patch)
tree: e176f9a6802119800a0c0faeb7ae018dd3e40879
parent: e27babd2d7d3c3d462bc33afd6695d7d1cae9185 (diff)
download: chrome-ec-8f48250c7e82e6d64119a8efed4f79ac40d381c4.tar.gz
1 files changed, 269 insertions, 0 deletions
diff --git a/util/crash_analyzer.py b/util/crash_analyzer.py
new file mode 100755
index 0000000000..6b38766049
--- /dev/null
+++ b/util/crash_analyzer.py
@@ -0,0 +1,269 @@
+#!/usr/bin/env python3
+# Copyright 2022 The ChromiumOS Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""EC Crash report analyzer"""
+
+import argparse
+import pathlib
+import re
+import sys
+
+# Regex tested here: https://regex101.com/r/K5S8cB/1
+# This Regex has only been tested in Cortex-M0+ crash reporter.
+# TODO(b/253492108): Add regexp for missing architectures.
+_REGEX_CORTEX_M0 = (
+    r"^Saved.*$\n=== PROCESS EXCEPTION: (.*) ====== xPSR: (.*) ===$\n"
+    r"r0 :(.*) r1 :(.*) r2 :(.*) r3 :(.*)$\n"
+    r"r4 :(.*) r5 :(.*) r6 :(.*) r7 :(.*)$\n"
+    r"r8 :(.*) r9 :(.*) r10:(.*) r11:(.*)$\n"
+    r"r12:(.*) sp :(.*) lr :(.*) pc :(.*)$\n"
+    r"\n"
+    r"^cfsr=(.*), shcsr=(.*), hfsr=(.*), dfsr=(.*), ipsr=(.*)$"
+)
+_symbols = []
+_entries = []
+
+
+def read_map_file(map_file):
+    """Reads the map file, and populates the _symbols list with the tuple address/name"""
+    lines = map_file.readlines()
+    for line in lines:
+        addr_str, _, name = line.split(" ")
+        addr = int(addr_str, 16)
+        _symbols.append((addr, name.strip()))
+
+
+def get_symbol_bisec(addr: int, low: int, high: int) -> str:
+    """Finds the symbol using binary search"""
+    # Element not found.
+    if low > high:
+        return f"invalid address: {format(addr, '#x')}"
+
+    mid = (high + low) // 2
+
+    # Corner case for last element.
+    if mid == len(_symbols) - 1:
+        if addr > _symbols[mid][0]:
+            return f"invalid address: {format(addr, '#x')}"
+        return _symbols[mid][1]
+
+    if _symbols[mid][0] <= addr < _symbols[mid + 1][0]:
+        symbol = _symbols[mid][1]
+        # Start of a sequence of Thumb instructions. When this happens, query
+        # for the next address.
+        if symbol == "$t":
+            symbol = _symbols[mid + 1][1]
+        return symbol
+
+    if addr > _symbols[mid][0]:
+        return get_symbol_bisec(addr, mid + 1, high)
+    return get_symbol_bisec(addr, low, mid - 1)
+
+
+def get_symbol(addr: int) -> str:
+    """Returns the function name that corresponds to the given address"""
+    symbol = get_symbol_bisec(addr, 0, len(_symbols) - 1)
+
+    # Symbols generated by the compiler to identify transitions in the
+    # code. If so, just append the address.
+    if symbol in ("$a", "$d", "$c", "$t"):
+        symbol = f"{symbol}:{format(addr,'#x')}"
+    return symbol
+
+
+def process_log_file(file_name: str) -> str:
+    """Reads a .log file and extracts the FW version"""
+    try:
+        with open(file_name, "r") as log_file:
+            lines = log_file.readlines()
+            for line in lines:
+                # Searching for something like:
+                # ===ec_info===
+                # vendor               | Nuvoton
+                # name                 | NPCX586G
+                # fw_version           | rammus_v2.0.460-d1d2aeb01f
+                if line.startswith("fw_version"):
+                    _, value = line.split("|")
+                    return value.strip()
+    except FileNotFoundError:
+        return ".log file not found"
+    return "unknown fw version"
+
+
+def process_crash_file(file_name: str) -> dict:
+    """Process a single crash report, and convert it to a dictionary"""
+    regs = {}
+    with open(file_name, "r") as crash_file:
+        content = crash_file.read()
+        # TODO(b/253492108): This is hardcoded to Cortex-M0+ crash reports.
+        # New ones (Risc-V, NDS32, etc.) will be added on demand.
+        #
+        # Expecting something like:
+        # Saved panic data: (NEW)
+        # === PROCESS EXCEPTION: ff ====== xPSR: ffffffff ===
+        # r0 :         r1 :         r2 :         r3 :
+        # r4 :dead6664 r5 :10092632 r6 :00000000 r7 :00000000
+        # r8 :00000000 r9 :00000000 r10:00000000 r11:00000000
+        # r12:         sp :00000000 lr :         pc :
+        #
+        # cfsr=00000000, shcsr=00000000, hfsr=00000000, dfsr=00000000, ipsr=000000ff
+
+        match = re.match(_REGEX_CORTEX_M0, content, re.MULTILINE)
+        values = []
+        # Convert the values to numbers, invalid the invalid ones.
+        # Cannot use list comprehension due to possible invalid values
+        if match is not None:
+            for i in match.groups():
+                try:
+                    val = int(i, 16)
+                except ValueError:
+                    # Value might be empty, so we must handle the exception
+                    val = -1
+                values.append(val)
+
+            regs["exp"] = values[0]
+            regs["xPSR"] = values[1]
+            regs["regs"] = values[2:15]
+            regs["sp"] = values[15]
+            regs["lr"] = values[16]
+            regs["pc"] = values[17]
+            regs["cfsr"] = values[18]
+            regs["chcsr"] = values[19]
+            regs["hfsr"] = values[20]
+            regs["dfsr"] = values[21]
+            regs["ipsr"] = values[22]
+            regs["symbol"] = get_symbol(regs["regs"][5])
+    return regs
+
+
+def process_crash_files(crash_folder):
+    """Process the crash reports that are in the crash_folder"""
+
+    processed = 0
+    for file in crash_folder.iterdir():
+        # .log and .upload_file_eccrash might not be in order.
+        # To avoid processing it more than once, only process the
+        # ones with extension ".upload_file_eccrash" and then read the ".log".
+        if file.suffix != ".upload_file_eccrash":
+            continue
+        entry = process_crash_file(file)
+        if len(entry) != 0:
+            fw_ver = process_log_file(file.parent.joinpath(file.stem + ".log"))
+            entry["fw_version"] = fw_ver
+
+        if len(entry) != 0:
+            _entries.append(entry)
+        processed += 1
+    print(f"Processed: {processed}", file=sys.stderr)
+
+
+def cmd_report_lite(crash_folder):
+    """Generates a 'lite' report that only contains a few fields"""
+
+    process_crash_files(crash_folder)
+    for entry in _entries:
+        print(
+            f"Task: {format(entry['exp'],'#04x')} - "
+            f"cause: {format(entry['regs'][4], '#x')} - "
+            f"PC: {entry['symbol']} - "
+            f"EC ver:{entry['fw_version']}"
+        )
+
+
+def cmd_report_full(crash_folder):
+    """Generates a full report in .cvs format"""
+
+    process_crash_files(crash_folder)
+    # Print header
+    print(
+        "Task,xPSR,r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,"
+        "sp,lr,pc,cfsr,chcsr,hfsr,dfsr,ipsr,symbol,fw_version"
+    )
+    for entry in _entries:
+        print(
+            f"{format(entry['exp'],'#04x')},{format(entry['xPSR'],'#x')}",
+            end="",
+        )
+        for i in range(12):
+            print(f",{format(entry['regs'][i],'#x')}", end="")
+        print(
+            f",{format(entry['sp'],'#x')}"
+            f",{format(entry['lr'],'#x')}"
+            f",{format(entry['pc'],'#x')}"
+            f",{format(entry['cfsr'],'#x')}"
+            f",{format(entry['hfsr'],'#x')}"
+            f",{format(entry['dfsr'],'#x')}"
+            f",{format(entry['ipsr'],'#x')}"
+            f",\"{(entry['symbol'])}\""
+            f",\"{(entry['fw_version'])}\""
+        )
+
+
+def main(argv):
+    """Main entry point"""
+    example_text = """Example:
+# 1st:
+# Collect the crash reports using this script:
+# https://source.corp.google.com/piper///depot/google3/experimental/users/ricardoq/crashpad/main.py
+# MUST be run within a Google3 Workspace. E.g:
+(google3) blaze run //experimental/users/ricardoq/crashpad:main -- --outdir=/tmp/dumps/ --limit=3000 --offset=15000 --hwclass=shyvana --milestone=105
+
+# 2nd:
+# Assuming that you don't have the .map file of the EC image,  you can download the EC image from LUCI
+# and then parse the .elf file by doing:
+nm -n ec.RW.elf | grep " [tT] " > /tmp/rammus_193.map
+
+# 3rd:
+# Run this script
+crash_analyzer.py full -m /tmp/rammus_193.map -f /tmp/dumps
+
+# Combine it with 'sort' and 'uniq' for better reports. E.g:
+crash_analyzer.py lite -m /tmp/rammus_193.map -f /tmp/dumps | sort | uniq -c | less
+
+# Tip:
+# Start by analyzing the "lite" report. If there is a function that calls your
+# attention, generate the "full" report and analyze with Ghidra and/or
+# IDA Pro the different "PC" that belong to the suspicious function.
+"""
+
+    parser = argparse.ArgumentParser(
+        prog="crash_analyzer",
+        epilog=example_text,
+        description="Process crash reports and converts them to human-friendly format.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "-m",
+        "--map-file",
+        type=argparse.FileType("r"),
+        required=True,
+        metavar="ec_map_file",
+        help="/path/to/ec_image_map_file",
+    )
+    parser.add_argument(
+        "-f",
+        "--crash_folder",
+        type=pathlib.Path,
+        required=True,
+        help="Folder with the EC crash report files",
+    )
+    parser.add_argument(
+        "command", choices=["lite", "full"], help="Command to run."
+    )
+    args = parser.parse_args(argv)
+
+    # Needed for all commands
+    read_map_file(args.map_file)
+
+    if args.command == "lite":
+        cmd_report_lite(args.crash_folder)
+    elif args.command == "full":
+        cmd_report_full(args.crash_folder)
+    else:
+        print(f"Unsupported command: {args.command}")
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
author	Ricardo Quesada <ricardoq@google.com>	2022-10-13 14:33:53 -0700
committer	Chromeos LUCI <chromeos-scoped@luci-project-accounts.iam.gserviceaccount.com>	2022-10-14 21:46:41 +0000
commit	8f48250c7e82e6d64119a8efed4f79ac40d381c4 (patch)
tree	e176f9a6802119800a0c0faeb7ae018dd3e40879
parent	e27babd2d7d3c3d462bc33afd6695d7d1cae9185 (diff)
download	chrome-ec-8f48250c7e82e6d64119a8efed4f79ac40d381c4.tar.gz