From 8e9445a678cab4478aa90a5f8898ba093371e30f Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Wed, 10 Feb 2021 14:29:33 +0200 Subject: [PATCH] habanalabs: add reset support when user closes FD In order to support command submissions that are done directly from user space, the driver must perform soft reset once user closes its FD. In case the soft reset fails or device is not idle, a hard reset should be performed. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 20 ++++++++++++++++++-- drivers/misc/habanalabs/common/habanalabs.h | 2 ++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 334009e83823..c74bdf4ae6aa 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -103,8 +103,24 @@ static int hl_device_release(struct inode *inode, struct file *filp) return 0; } - hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); - hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); + hl_cb_mgr_fini(hdev, &hpriv->cb_mgr); + hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); + + if (hdev->reset_upon_device_release) { + u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; + + /* We try soft reset first */ + hl_device_reset(hdev, false, false); + + /* If device is not idle perform hard reset */ + if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, + HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { + dev_info(hdev->dev, + "device is not idle (mask %#llx %#llx) after soft reset, performing hard reset", + idle_mask[0], idle_mask[1]); + hl_device_reset(hdev, true, false); + } + } hl_hpriv_put(hpriv); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 4b321e4f8059..4fdb4fa5728d 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1920,6 +1920,7 @@ struct hl_mmu_funcs { * @device_fini_pending: true if device_fini was called and might be * waiting for the reset thread to finish * @supports_staged_submission: true if staged submissions are supported + * @reset_upon_device_release: true if reset is required upon device release */ struct hl_device { struct pci_dev *pdev; @@ -2026,6 +2027,7 @@ struct hl_device { u8 process_kill_trial_cnt; u8 device_fini_pending; u8 supports_staged_submission; + u8 reset_upon_device_release; /* Parameters for bring-up */ u64 nic_ports_mask;