habanalabs: force user to set device debug mode
This patch adds the implementation of the HL_DEBUG_OP_SET_MODE opcode in the DEBUG IOCTL. It forces the user who wants to debug the device to set the device into debug mode before he can configure the debug engines. The patch also makes sure to disable debug mode upon user releasing FD, in case the user forgot to disable debug mode. Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
parent
d1287493ab
commit
19734970c9
@ -31,7 +31,9 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
|
||||
* Coresight might be still working by accessing addresses
|
||||
* related to the stopped engines. Hence stop it explicitly.
|
||||
*/
|
||||
hdev->asic_funcs->halt_coresight(hdev);
|
||||
if (hdev->in_debug)
|
||||
hl_device_set_debug_mode(hdev, false);
|
||||
|
||||
hl_vm_ctx_fini(ctx);
|
||||
hl_asid_free(hdev, ctx->asid);
|
||||
}
|
||||
|
@ -231,6 +231,7 @@ static int device_early_init(struct hl_device *hdev)
|
||||
|
||||
mutex_init(&hdev->fd_open_cnt_lock);
|
||||
mutex_init(&hdev->send_cpu_message_lock);
|
||||
mutex_init(&hdev->debug_lock);
|
||||
mutex_init(&hdev->mmu_cache_lock);
|
||||
INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
|
||||
spin_lock_init(&hdev->hw_queues_mirror_lock);
|
||||
@ -262,6 +263,7 @@ early_fini:
|
||||
static void device_early_fini(struct hl_device *hdev)
|
||||
{
|
||||
mutex_destroy(&hdev->mmu_cache_lock);
|
||||
mutex_destroy(&hdev->debug_lock);
|
||||
mutex_destroy(&hdev->send_cpu_message_lock);
|
||||
|
||||
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
|
||||
@ -420,6 +422,52 @@ int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
|
||||
return 1;
|
||||
}
|
||||
|
||||
int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
mutex_lock(&hdev->debug_lock);
|
||||
|
||||
if (!enable) {
|
||||
if (!hdev->in_debug) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to disable debug mode because device was not in debug mode\n");
|
||||
rc = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
hdev->asic_funcs->halt_coresight(hdev);
|
||||
hdev->in_debug = 0;
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (hdev->in_debug) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to enable debug mode because device is already in debug mode\n");
|
||||
rc = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_lock(&hdev->fd_open_cnt_lock);
|
||||
|
||||
if (atomic_read(&hdev->fd_open_cnt) > 1) {
|
||||
dev_err(hdev->dev,
|
||||
"Failed to enable debug mode. More then a single user is using the device\n");
|
||||
rc = -EPERM;
|
||||
goto unlock_fd_open_lock;
|
||||
}
|
||||
|
||||
hdev->in_debug = 1;
|
||||
|
||||
unlock_fd_open_lock:
|
||||
mutex_unlock(&hdev->fd_open_cnt_lock);
|
||||
out:
|
||||
mutex_unlock(&hdev->debug_lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* hl_device_suspend - initiate device suspend
|
||||
*
|
||||
|
@ -1117,6 +1117,7 @@ struct hl_device_reset_work {
|
||||
* lock here so we can flush user processes which are opening
|
||||
* the device while we are trying to hard reset it
|
||||
* @send_cpu_message_lock: enforces only one message in KMD <-> ArmCP queue.
|
||||
* @debug_lock: protects critical section of setting debug mode for device
|
||||
* @asic_prop: ASIC specific immutable properties.
|
||||
* @asic_funcs: ASIC specific functions.
|
||||
* @asic_specific: ASIC specific information to use only from ASIC files.
|
||||
@ -1159,6 +1160,8 @@ struct hl_device_reset_work {
|
||||
* @mmu_enable: is MMU enabled.
|
||||
* @device_cpu_disabled: is the device CPU disabled (due to timeouts)
|
||||
* @dma_mask: the dma mask that was set for this device
|
||||
* @in_debug: is device under debug. This, together with fd_open_cnt, enforces
|
||||
* that only a single user is configuring the debug infrastructure.
|
||||
*/
|
||||
struct hl_device {
|
||||
struct pci_dev *pdev;
|
||||
@ -1188,6 +1191,7 @@ struct hl_device {
|
||||
/* TODO: remove fd_open_cnt_lock for multiple process support */
|
||||
struct mutex fd_open_cnt_lock;
|
||||
struct mutex send_cpu_message_lock;
|
||||
struct mutex debug_lock;
|
||||
struct asic_fixed_properties asic_prop;
|
||||
const struct hl_asic_funcs *asic_funcs;
|
||||
void *asic_specific;
|
||||
@ -1230,6 +1234,7 @@ struct hl_device {
|
||||
u8 init_done;
|
||||
u8 device_cpu_disabled;
|
||||
u8 dma_mask;
|
||||
u8 in_debug;
|
||||
|
||||
/* Parameters for bring-up */
|
||||
u8 mmu_enable;
|
||||
@ -1325,6 +1330,7 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
|
||||
int hl_device_open(struct inode *inode, struct file *filp);
|
||||
bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
|
||||
enum hl_device_status hl_device_status(struct hl_device *hdev);
|
||||
int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
|
||||
int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
|
||||
enum hl_asic_type asic_type, int minor);
|
||||
void destroy_hdev(struct hl_device *hdev);
|
||||
|
@ -105,6 +105,14 @@ int hl_device_open(struct inode *inode, struct file *filp)
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
if (hdev->in_debug) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"Can't open %s because it is being debugged by another user\n",
|
||||
dev_name(hdev->dev));
|
||||
mutex_unlock(&hdev->fd_open_cnt_lock);
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
if (atomic_read(&hdev->fd_open_cnt)) {
|
||||
dev_info_ratelimited(hdev->dev,
|
||||
"Can't open %s because another user is working on it\n",
|
||||
|
@ -254,10 +254,18 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
|
||||
case HL_DEBUG_OP_BMON:
|
||||
case HL_DEBUG_OP_SPMU:
|
||||
case HL_DEBUG_OP_TIMESTAMP:
|
||||
if (!hdev->in_debug) {
|
||||
dev_err(hdev->dev,
|
||||
"Rejecting debug configuration request because device not in debug mode\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
args->input_size =
|
||||
min(args->input_size, hl_debug_struct_size[args->op]);
|
||||
rc = debug_coresight(hdev, args);
|
||||
break;
|
||||
case HL_DEBUG_OP_SET_MODE:
|
||||
rc = hl_device_set_debug_mode(hdev, (bool) args->enable);
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev, "Invalid request %d\n", args->op);
|
||||
rc = -ENOTTY;
|
||||
|
Loading…
Reference in New Issue
Block a user