habanalabs: clear non-released encapsulated signals
Reserved encapsulated signals which were not released hold the context refcount, leading to a failure when killing the user process on device reset or device fini. Add the release of these left signals in the CS roll-back process. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
@ -742,13 +742,11 @@ static void cs_do_release(struct kref *ref)
|
|||||||
*/
|
*/
|
||||||
if (hl_cs_cmpl->encaps_signals)
|
if (hl_cs_cmpl->encaps_signals)
|
||||||
kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
|
kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
|
||||||
hl_encaps_handle_do_release);
|
hl_encaps_release_handle_and_put_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
|
if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals)
|
||||||
&& cs->encaps_signals)
|
kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx);
|
||||||
kref_put(&cs->encaps_sig_hdl->refcount,
|
|
||||||
hl_encaps_handle_do_release);
|
|
||||||
|
|
||||||
out:
|
out:
|
||||||
/* Must be called before hl_ctx_put because inside we use ctx to get
|
/* Must be called before hl_ctx_put because inside we use ctx to get
|
||||||
@ -1011,6 +1009,34 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
|
|||||||
hl_complete_job(hdev, job);
|
hl_complete_job(hdev, job);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* release_reserved_encaps_signals() - release reserved encapsulated signals.
|
||||||
|
* @hdev: pointer to habanalabs device structure
|
||||||
|
*
|
||||||
|
* Release reserved encapsulated signals which weren't un-reserved, or for which a CS with
|
||||||
|
* encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back.
|
||||||
|
* For these signals need also to put the refcount of the H/W SOB which was taken at the
|
||||||
|
* reservation.
|
||||||
|
*/
|
||||||
|
static void release_reserved_encaps_signals(struct hl_device *hdev)
|
||||||
|
{
|
||||||
|
struct hl_ctx *ctx = hl_get_compute_ctx(hdev);
|
||||||
|
struct hl_cs_encaps_sig_handle *handle;
|
||||||
|
struct hl_encaps_signals_mgr *mgr;
|
||||||
|
u32 id;
|
||||||
|
|
||||||
|
if (!ctx)
|
||||||
|
return;
|
||||||
|
|
||||||
|
mgr = &ctx->sig_mgr;
|
||||||
|
|
||||||
|
idr_for_each_entry(&mgr->handles, handle, id)
|
||||||
|
if (handle->cs_seq == ULLONG_MAX)
|
||||||
|
kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx);
|
||||||
|
|
||||||
|
hl_ctx_put(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
|
void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@ -1039,6 +1065,8 @@ void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
|
|||||||
}
|
}
|
||||||
|
|
||||||
force_complete_multi_cs(hdev);
|
force_complete_multi_cs(hdev);
|
||||||
|
|
||||||
|
release_reserved_encaps_signals(hdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -2001,6 +2029,8 @@ static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
|
|||||||
*/
|
*/
|
||||||
handle->pre_sob_val = prop->next_sob_val - handle->count;
|
handle->pre_sob_val = prop->next_sob_val - handle->count;
|
||||||
|
|
||||||
|
handle->cs_seq = ULLONG_MAX;
|
||||||
|
|
||||||
*signals_count = prop->next_sob_val;
|
*signals_count = prop->next_sob_val;
|
||||||
hdev->asic_funcs->hw_queues_unlock(hdev);
|
hdev->asic_funcs->hw_queues_unlock(hdev);
|
||||||
|
|
||||||
@ -2350,10 +2380,8 @@ put_cs:
|
|||||||
/* We finished with the CS in this function, so put the ref */
|
/* We finished with the CS in this function, so put the ref */
|
||||||
cs_put(cs);
|
cs_put(cs);
|
||||||
free_cs_chunk_array:
|
free_cs_chunk_array:
|
||||||
if (!wait_cs_submitted && cs_encaps_signals && handle_found &&
|
if (!wait_cs_submitted && cs_encaps_signals && handle_found && is_wait_cs)
|
||||||
is_wait_cs)
|
kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx);
|
||||||
kref_put(&encaps_sig_hdl->refcount,
|
|
||||||
hl_encaps_handle_do_release);
|
|
||||||
kfree(cs_chunk_array);
|
kfree(cs_chunk_array);
|
||||||
out:
|
out:
|
||||||
return rc;
|
return rc;
|
||||||
|
@ -9,37 +9,46 @@
|
|||||||
|
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
|
||||||
void hl_encaps_handle_do_release(struct kref *ref)
|
static void encaps_handle_do_release(struct hl_cs_encaps_sig_handle *handle, bool put_hw_sob,
|
||||||
|
bool put_ctx)
|
||||||
{
|
{
|
||||||
struct hl_cs_encaps_sig_handle *handle =
|
|
||||||
container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
|
|
||||||
struct hl_encaps_signals_mgr *mgr = &handle->ctx->sig_mgr;
|
struct hl_encaps_signals_mgr *mgr = &handle->ctx->sig_mgr;
|
||||||
|
|
||||||
|
if (put_hw_sob)
|
||||||
|
hw_sob_put(handle->hw_sob);
|
||||||
|
|
||||||
spin_lock(&mgr->lock);
|
spin_lock(&mgr->lock);
|
||||||
idr_remove(&mgr->handles, handle->id);
|
idr_remove(&mgr->handles, handle->id);
|
||||||
spin_unlock(&mgr->lock);
|
spin_unlock(&mgr->lock);
|
||||||
|
|
||||||
hl_ctx_put(handle->ctx);
|
if (put_ctx)
|
||||||
|
hl_ctx_put(handle->ctx);
|
||||||
|
|
||||||
kfree(handle);
|
kfree(handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hl_encaps_handle_do_release_sob(struct kref *ref)
|
void hl_encaps_release_handle_and_put_ctx(struct kref *ref)
|
||||||
{
|
{
|
||||||
struct hl_cs_encaps_sig_handle *handle =
|
struct hl_cs_encaps_sig_handle *handle =
|
||||||
container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
|
container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
|
||||||
struct hl_encaps_signals_mgr *mgr = &handle->ctx->sig_mgr;
|
|
||||||
|
|
||||||
/* if we're here, then there was a signals reservation but cs with
|
encaps_handle_do_release(handle, false, true);
|
||||||
* encaps signals wasn't submitted, so need to put refcount
|
}
|
||||||
* to hw_sob taken at the reservation.
|
|
||||||
*/
|
|
||||||
hw_sob_put(handle->hw_sob);
|
|
||||||
|
|
||||||
spin_lock(&mgr->lock);
|
static void hl_encaps_release_handle_and_put_sob(struct kref *ref)
|
||||||
idr_remove(&mgr->handles, handle->id);
|
{
|
||||||
spin_unlock(&mgr->lock);
|
struct hl_cs_encaps_sig_handle *handle =
|
||||||
|
container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
|
||||||
|
|
||||||
kfree(handle);
|
encaps_handle_do_release(handle, true, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void hl_encaps_release_handle_and_put_sob_ctx(struct kref *ref)
|
||||||
|
{
|
||||||
|
struct hl_cs_encaps_sig_handle *handle =
|
||||||
|
container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
|
||||||
|
|
||||||
|
encaps_handle_do_release(handle, true, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr)
|
static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr)
|
||||||
@ -48,8 +57,7 @@ static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr)
|
|||||||
idr_init(&mgr->handles);
|
idr_init(&mgr->handles);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hl_encaps_sig_mgr_fini(struct hl_device *hdev,
|
static void hl_encaps_sig_mgr_fini(struct hl_device *hdev, struct hl_encaps_signals_mgr *mgr)
|
||||||
struct hl_encaps_signals_mgr *mgr)
|
|
||||||
{
|
{
|
||||||
struct hl_cs_encaps_sig_handle *handle;
|
struct hl_cs_encaps_sig_handle *handle;
|
||||||
struct idr *idp;
|
struct idr *idp;
|
||||||
@ -57,11 +65,14 @@ static void hl_encaps_sig_mgr_fini(struct hl_device *hdev,
|
|||||||
|
|
||||||
idp = &mgr->handles;
|
idp = &mgr->handles;
|
||||||
|
|
||||||
|
/* The IDR is expected to be empty at this stage, because any left signal should have been
|
||||||
|
* released as part of CS roll-back.
|
||||||
|
*/
|
||||||
if (!idr_is_empty(idp)) {
|
if (!idr_is_empty(idp)) {
|
||||||
dev_warn(hdev->dev, "device released while some encaps signals handles are still allocated\n");
|
dev_warn(hdev->dev,
|
||||||
|
"device released while some encaps signals handles are still allocated\n");
|
||||||
idr_for_each_entry(idp, handle, id)
|
idr_for_each_entry(idp, handle, id)
|
||||||
kref_put(&handle->refcount,
|
kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob);
|
||||||
hl_encaps_handle_do_release_sob);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
idr_destroy(&mgr->handles);
|
idr_destroy(&mgr->handles);
|
||||||
|
@ -3775,7 +3775,8 @@ void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *d
|
|||||||
|
|
||||||
void hw_sob_get(struct hl_hw_sob *hw_sob);
|
void hw_sob_get(struct hl_hw_sob *hw_sob);
|
||||||
void hw_sob_put(struct hl_hw_sob *hw_sob);
|
void hw_sob_put(struct hl_hw_sob *hw_sob);
|
||||||
void hl_encaps_handle_do_release(struct kref *ref);
|
void hl_encaps_release_handle_and_put_ctx(struct kref *ref);
|
||||||
|
void hl_encaps_release_handle_and_put_sob_ctx(struct kref *ref);
|
||||||
void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
|
void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
|
||||||
struct hl_cs *cs, struct hl_cs_job *job,
|
struct hl_cs *cs, struct hl_cs_job *job,
|
||||||
struct hl_cs_compl *cs_cmpl);
|
struct hl_cs_compl *cs_cmpl);
|
||||||
|
Reference in New Issue
Block a user