habanalabs/gaudi: sync stream add protection to SOB reset flow

Since we moved the SOB reset flow to workqueue and
not part of the fence release flow, we might reach a
scenario where new context is created while we in the middle
of resetting the SOB.
in such cases the reset may fail due to idle check.
This will mess up the streams sync since the SOB value is invalid.
so we protect this area with a mutex, to delay context creation.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
farah kassabri 2021-03-30 16:38:02 +03:00 committed by Oded Gabbay
parent cf39395034
commit e65448faf4

View File

@ -5729,18 +5729,26 @@ release_cb:
static int gaudi_schedule_register_memset(struct hl_device *hdev,
u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
{
struct hl_ctx *ctx = hdev->compute_ctx;
struct hl_ctx *ctx;
struct hl_pending_cb *pending_cb;
struct packet_msg_long *pkt;
u32 cb_size, ctl;
struct hl_cb *cb;
int i;
int i, rc;
mutex_lock(&hdev->fpriv_list_lock);
ctx = hdev->compute_ctx;
/* If no compute context available or context is going down
* memset registers directly
*/
if (!ctx || kref_read(&ctx->refcount) == 0)
return gaudi_memset_registers(hdev, reg_base, num_regs, val);
if (!ctx || kref_read(&ctx->refcount) == 0) {
rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
mutex_unlock(&hdev->fpriv_list_lock);
return rc;
}
mutex_unlock(&hdev->fpriv_list_lock);
cb_size = (sizeof(*pkt) * num_regs) +
sizeof(struct packet_msg_prot) * 2;