diff options
Diffstat (limited to 'include/uapi/misc/habanalabs.h')
-rw-r--r-- | include/uapi/misc/habanalabs.h | 166 |
1 files changed, 139 insertions, 27 deletions
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 00b309590499..371dfc4243b3 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -333,7 +333,18 @@ enum hl_server_type { * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency + * HL_INFO_POWER - Retrieve power information * HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls + * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info + * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num + * HL_INFO_LAST_ERR_OPEN_DEV_TIME - Retrieve timestamp of the last time the device was opened + * and CS timeout or razwi error occurred. + * HL_INFO_CS_TIMEOUT_EVENT - Retrieve CS timeout timestamp and its related CS sequence number. + * HL_INFO_RAZWI_EVENT - Retrieve parameters of razwi: + * Timestamp of razwi. + * The address which accessing it caused the razwi. + * Razwi initiator. + * Razwi cause, was it a page fault or MMU access error. */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -353,8 +364,13 @@ enum hl_server_type { #define HL_INFO_PLL_FREQUENCY 16 #define HL_INFO_POWER 17 #define HL_INFO_OPEN_STATS 18 +#define HL_INFO_DRAM_REPLACED_ROWS 21 +#define HL_INFO_DRAM_PENDING_ROWS 22 +#define HL_INFO_LAST_ERR_OPEN_DEV_TIME 23 +#define HL_INFO_CS_TIMEOUT_EVENT 24 +#define HL_INFO_RAZWI_EVENT 25 -#define HL_INFO_VERSION_MAX_LEN 128 +#define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 /** @@ -473,15 +489,27 @@ struct hl_info_pci_counters { __u64 replay_cnt; }; -#define HL_CLK_THROTTLE_POWER 0x1 -#define HL_CLK_THROTTLE_THERMAL 0x2 +enum hl_clk_throttling_type { + HL_CLK_THROTTLE_TYPE_POWER, + HL_CLK_THROTTLE_TYPE_THERMAL, + HL_CLK_THROTTLE_TYPE_MAX +}; + +/* clk_throttling_reason masks */ +#define HL_CLK_THROTTLE_POWER (1 << HL_CLK_THROTTLE_TYPE_POWER) +#define HL_CLK_THROTTLE_THERMAL (1 << HL_CLK_THROTTLE_TYPE_THERMAL) /** * struct hl_info_clk_throttle - clock throttling reason * @clk_throttling_reason: each bit represents a clk throttling reason + * @clk_throttling_timestamp_us: represents CPU timestamp in microseconds of the start-event + * @clk_throttling_duration_ns: the clock throttle time in nanosec */ struct hl_info_clk_throttle { __u32 clk_throttling_reason; + __u32 pad; + __u64 clk_throttling_timestamp_us[HL_CLK_THROTTLE_TYPE_MAX]; + __u64 clk_throttling_duration_ns[HL_CLK_THROTTLE_TYPE_MAX]; }; /** @@ -559,6 +587,51 @@ struct hl_info_cs_counters { __u64 ctx_validation_drop_cnt; }; +/** + * struct hl_info_last_err_open_dev_time - last error boot information. + * @timestamp: timestamp of last time the device was opened and error occurred. + */ +struct hl_info_last_err_open_dev_time { + __s64 timestamp; +}; + +/** + * struct hl_info_cs_timeout_event - last CS timeout information. + * @timestamp: timestamp when last CS timeout event occurred. + * @seq: sequence number of last CS timeout event. + */ +struct hl_info_cs_timeout_event { + __s64 timestamp; + __u64 seq; +}; + +#define HL_RAZWI_PAGE_FAULT 0 +#define HL_RAZWI_MMU_ACCESS_ERROR 1 + +/** + * struct hl_info_razwi_event - razwi information. + * @timestamp: timestamp of razwi. + * @addr: address which accessing it caused razwi. + * @engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does not + * have engine id it will be set to U16_MAX. + * @engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible + * engines which one them caused the razwi. In that case, it will contain the + * second possible engine id, otherwise it will be set to U16_MAX. + * @no_engine_id: if razwi initiator does not have engine id, this field will be set to 1, + * otherwise 0. + * @error_type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX. + * @pad: padding to 64 bit. + */ +struct hl_info_razwi_event { + __s64 timestamp; + __u64 addr; + __u16 engine_id_1; + __u16 engine_id_2; + __u8 no_engine_id; + __u8 error_type; + __u8 pad[2]; +}; + enum gaudi_dcores { HL_GAUDI_WS_DCORE, HL_GAUDI_WN_DCORE, @@ -607,7 +680,10 @@ struct hl_info_args { #define HL_MAX_CB_SIZE (0x200000 - 32) /* Indicates whether the command buffer should be mapped to the device's MMU */ -#define HL_CB_FLAGS_MAP 0x1 +#define HL_CB_FLAGS_MAP 0x1 + +/* Used with HL_CB_OP_INFO opcode to get the device va address for kernel mapped CB */ +#define HL_CB_FLAGS_GET_DEVICE_VA 0x2 struct hl_cb_in { /* Handle of CB or 0 if we want to create one */ @@ -629,11 +705,16 @@ struct hl_cb_out { /* Handle of CB */ __u64 cb_handle; - /* Information about CB */ - struct { - /* Usage count of CB */ - __u32 usage_cnt; - __u32 pad; + union { + /* Information about CB */ + struct { + /* Usage count of CB */ + __u32 usage_cnt; + __u32 pad; + }; + + /* CB mapped address to device MMU */ + __u64 device_va; }; }; }; @@ -856,9 +937,17 @@ struct hl_cs_out { /* * SOB base address offset - * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set + * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY or HL_CS_FLAGS_SIGNAL is set */ __u32 sob_base_addr_offset; + + /* + * Count of completed signals in SOB before current signal submission. + * Valid only when (HL_CS_FLAGS_ENCAP_SIGNALS & HL_CS_FLAGS_STAGED_SUBMISSION) + * or HL_CS_FLAGS_SIGNAL is set + */ + __u16 sob_count_before_submission; + __u16 pad[3]; }; union hl_cs_args { @@ -866,9 +955,10 @@ union hl_cs_args { struct hl_cs_out out; }; -#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 -#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 -#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 +#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 +#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 +#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 +#define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10 #define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 @@ -888,14 +978,23 @@ struct hl_wait_cs_in { }; struct { - /* User address for completion comparison. - * upon interrupt, driver will compare the value pointed - * by this address with the supplied target value. - * in order not to perform any comparison, set address - * to all 1s. - * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set - */ - __u64 addr; + union { + /* User address for completion comparison. + * upon interrupt, driver will compare the value pointed + * by this address with the supplied target value. + * in order not to perform any comparison, set address + * to all 1s. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set + */ + __u64 addr; + + /* cq_counters_handle to a kernel mapped cb which contains + * cq counters. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set + */ + __u64 cq_counters_handle; + }; + /* Target value for completion comparison */ __u64 target; }; @@ -911,14 +1010,27 @@ struct hl_wait_cs_in { */ __u32 flags; - /* Multi CS API info- valid entries in multi-CS array */ - __u8 seq_arr_len; - __u8 pad[3]; + union { + struct { + /* Multi CS API info- valid entries in multi-CS array */ + __u8 seq_arr_len; + __u8 pad[7]; + }; + + /* Absolute timeout to wait for an interrupt in microseconds. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set + */ + __u64 interrupt_timeout_us; + }; - /* Absolute timeout to wait for an interrupt in microseconds. - * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set + /* + * cq counter offset inside the counters cb pointed by cq_counters_handle above. + * upon interrupt, driver will compare the value pointed + * by this address (cq_counters_handle + cq_counters_offset) + * with the supplied target value. + * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set */ - __u32 interrupt_timeout_us; + __u64 cq_counters_offset; }; #define HL_WAIT_CS_STATUS_COMPLETED 0 |