diff options
author | Ofir Bitton <obitton@habana.ai> | 2021-06-02 11:56:31 +0300 |
---|---|---|
committer | Oded Gabbay <ogabbay@kernel.org> | 2021-06-18 15:23:41 +0300 |
commit | 254fac6d1a73aac40aa4d423c993965987728040 (patch) | |
tree | de2cedf541b3e34ff1344ed8bf01708de1ca3e4a /drivers/misc/habanalabs/include | |
parent | a39725819c816c87c6b4eeca4c10197a41e2a928 (diff) | |
download | linux-254fac6d1a73aac40aa4d423c993965987728040.tar.gz |
habanalabs/gaudi: add FW alive event support
In order for driver to be aware of process or thread crashes inside
GAUDI's CPU, we introduce a new event which contains all relevant
information. Upon event reception, driver will dump information and
will reset the device.
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/misc/habanalabs/include')
3 files changed, 17 insertions, 1 deletions
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index c7da62243619..d4dc189a6c92 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -84,6 +84,20 @@ struct hl_eq_sm_sei_data { __u8 pad[3]; }; +enum hl_fw_alive_severity { + FW_ALIVE_SEVERITY_MINOR, + FW_ALIVE_SEVERITY_CRITICAL +}; + +struct hl_eq_fw_alive { + __le64 uptime_seconds; + __le32 process_id; + __le32 thread_id; + /* enum hl_fw_alive_severity */ + __u8 severity; + __u8 pad[7]; +}; + struct hl_eq_entry { struct hl_eq_header hdr; union { @@ -91,6 +105,7 @@ struct hl_eq_entry { struct hl_eq_hbm_ecc_data hbm_ecc_data; struct hl_eq_sm_sei_data sm_sei_data; struct cpucp_pkt_sync_err pkt_sync_err; + struct hl_eq_fw_alive fw_alive; __le64 data[7]; }; }; diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h index e8651abf84f2..f66c759952e4 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h @@ -303,6 +303,7 @@ enum gaudi_async_event_id { GAUDI_EVENT_NIC3_QP1 = 619, GAUDI_EVENT_NIC4_QP0 = 620, GAUDI_EVENT_NIC4_QP1 = 621, + GAUDI_EVENT_FW_ALIVE_S = 645, GAUDI_EVENT_DEV_RESET_REQ = 646, GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647, GAUDI_EVENT_FIX_POWER_ENV_S = 658, diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h index 3dc79c131805..e87554ab0102 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h @@ -669,7 +669,7 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = { { .fc_id = 642, .cpu_id = 491, .valid = 0, .name = "" }, { .fc_id = 643, .cpu_id = 492, .valid = 0, .name = "" }, { .fc_id = 644, .cpu_id = 493, .valid = 0, .name = "" }, - { .fc_id = 645, .cpu_id = 494, .valid = 0, .name = "" }, + { .fc_id = 645, .cpu_id = 494, .valid = 1, .name = "FW_ALIVE_S" }, { .fc_id = 646, .cpu_id = 495, .valid = 1, .name = "DEV_RESET_REQ" }, { .fc_id = 647, .cpu_id = 496, .valid = 1, .name = "PKT_QUEUE_OUT_SYNC" }, |