From 647469148360dc873405acc6fcf63772e9e401f4 Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Thu, 12 May 2022 11:59:41 +0300 Subject: [PATCH] habanalabs: expose undefined opcode status via info ioctl The info ioctl retrieves information on the last undefined opcode occurred. Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/habanalabs_ioctl.c | 25 ++++++++++++++++ include/uapi/misc/habanalabs.h | 30 +++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index c7864d6bb0a1..fe7ed46cd1c5 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -610,6 +610,28 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args) return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } +static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + struct hl_info_undefined_opcode_event info = {0}; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + info.timestamp = ktime_to_ns(hdev->last_error.undef_opcode.timestamp); + info.engine_id = hdev->last_error.undef_opcode.engine_id; + info.cq_addr = hdev->last_error.undef_opcode.cq_addr; + info.cq_size = hdev->last_error.undef_opcode.cq_size; + info.stream_id = hdev->last_error.undef_opcode.stream_id; + info.cb_addr_streams_len = hdev->last_error.undef_opcode.cb_addr_streams_len; + memcpy(info.cb_addr_streams, hdev->last_error.undef_opcode.cb_addr_streams, + sizeof(info.cb_addr_streams)); + + return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; +} + static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { void __user *out = (void __user *) (uintptr_t) args->return_pointer; @@ -718,6 +740,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_RAZWI_EVENT: return razwi_info(hpriv, args); + case HL_INFO_UNDEFINED_OPCODE_EVENT: + return undefined_opcode_info(hpriv, args); + case HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES: return dev_mem_alloc_page_sizes_info(hpriv, args); diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index c94b89cf1ec1..5f9a6097f5f3 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -352,6 +352,7 @@ enum hl_server_type { * HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications. * HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd * HL_INFO_GET_EVENTS - Retrieve the last occurred events + * HL_INFO_UNDEFINED_OPCODE_EVENT - Retrieve last undefined opcode error information. */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -380,6 +381,7 @@ enum hl_server_type { #define HL_INFO_REGISTER_EVENTFD 28 #define HL_INFO_UNREGISTER_EVENTFD 29 #define HL_INFO_GET_EVENTS 30 +#define HL_INFO_UNDEFINED_OPCODE_EVENT 31 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -656,6 +658,34 @@ struct hl_info_razwi_event { __u8 pad[2]; }; +#define MAX_QMAN_STREAMS_INFO 4 +#define OPCODE_INFO_MAX_ADDR_SIZE 8 +/** + * struct hl_info_undefined_opcode_event - info about last undefined opcode error + * @timestamp: timestamp of the undefined opcode error + * @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ + * entiers. In case all streams array entries are + * filled with values, it means the execution was in Lower-CP. + * @cq_addr: the address of the current handled command buffer + * @cq_size: the size of the current handled command buffer + * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array. + * should be equal to 1 incase of undefined opcode + * in Upper-CP (specific stream) and equal to 4 incase + * of undefined opcode in Lower-CP. + * @engine_id: engine-id that the error occurred on + * @stream_id: the stream id the error occurred on. In case the stream equals to + * MAX_QMAN_STREAMS_INFO it means the error occurred on a Lower-CP. + */ +struct hl_info_undefined_opcode_event { + __s64 timestamp; + __u64 cb_addr_streams[MAX_QMAN_STREAMS_INFO][OPCODE_INFO_MAX_ADDR_SIZE]; + __u64 cq_addr; + __u32 cq_size; + __u32 cb_addr_streams_len; + __u32 engine_id; + __u32 stream_id; +}; + /** * struct hl_info_dev_memalloc_page_sizes - valid page sizes in device mem alloc information. * @page_order_bitmask: bitmap in which a set bit represents the order of the supported page size