drm/amdkfd: Skip packet submission on fatal error
If fatal error is detected, packet submission won't go through. Return error in such cases. Also, avoid waiting for fence when fatal error is detected. Signed-off-by: Lijo Lazar <lijo.lazar@amd.com> Reviewed-by: Asad Kamal <asad.kamal@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
1b6ef74b2b
commit
e1f6746f33
@ -742,6 +742,11 @@ void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
|
||||
amdgpu_device_flush_hdp(adev, NULL);
|
||||
}
|
||||
|
||||
bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
|
||||
{
|
||||
return amdgpu_ras_get_fed_status(adev);
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
|
||||
enum amdgpu_ras_block block, bool reset)
|
||||
{
|
||||
|
@ -337,6 +337,7 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
|
||||
struct tile_config *config);
|
||||
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
|
||||
enum amdgpu_ras_block block, bool reset);
|
||||
bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
|
||||
bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
|
||||
void amdgpu_amdkfd_block_mmu_notifications(void *p);
|
||||
int amdgpu_amdkfd_criu_resume(void *p);
|
||||
|
@ -1903,6 +1903,10 @@ int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm,
|
||||
uint64_t *fence_addr = dqm->fence_addr;
|
||||
|
||||
while (*fence_addr != fence_value) {
|
||||
/* Fatal err detected, this response won't come */
|
||||
if (amdgpu_amdkfd_is_fed(dqm->dev->adev))
|
||||
return -EIO;
|
||||
|
||||
if (time_after(jiffies, end_jiffies)) {
|
||||
dev_err(dev, "qcm fence wait loop timeout expired\n");
|
||||
/* In HWS case, this is used to halt the driver thread
|
||||
|
@ -286,7 +286,7 @@ err_no_space:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void kq_submit_packet(struct kernel_queue *kq)
|
||||
int kq_submit_packet(struct kernel_queue *kq)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
int i;
|
||||
@ -298,6 +298,10 @@ void kq_submit_packet(struct kernel_queue *kq)
|
||||
}
|
||||
pr_debug("\n");
|
||||
#endif
|
||||
/* Fatal err detected, packet submission won't go through */
|
||||
if (amdgpu_amdkfd_is_fed(kq->dev->adev))
|
||||
return -EIO;
|
||||
|
||||
if (kq->dev->kfd->device_info.doorbell_size == 8) {
|
||||
*kq->wptr64_kernel = kq->pending_wptr64;
|
||||
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
|
||||
@ -307,6 +311,8 @@ void kq_submit_packet(struct kernel_queue *kq)
|
||||
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
|
||||
kq->pending_wptr);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kq_rollback_packet(struct kernel_queue *kq)
|
||||
|
@ -47,7 +47,7 @@
|
||||
int kq_acquire_packet_buffer(struct kernel_queue *kq,
|
||||
size_t packet_size_in_dwords,
|
||||
unsigned int **buffer_ptr);
|
||||
void kq_submit_packet(struct kernel_queue *kq);
|
||||
int kq_submit_packet(struct kernel_queue *kq);
|
||||
void kq_rollback_packet(struct kernel_queue *kq);
|
||||
|
||||
|
||||
|
@ -288,7 +288,7 @@ int pm_send_set_resources(struct packet_manager *pm,
|
||||
|
||||
retval = pm->pmf->set_resources(pm, buffer, res);
|
||||
if (!retval)
|
||||
kq_submit_packet(pm->priv_queue);
|
||||
retval = kq_submit_packet(pm->priv_queue);
|
||||
else
|
||||
kq_rollback_packet(pm->priv_queue);
|
||||
|
||||
@ -325,7 +325,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
|
||||
if (retval)
|
||||
goto fail_create_runlist;
|
||||
|
||||
kq_submit_packet(pm->priv_queue);
|
||||
retval = kq_submit_packet(pm->priv_queue);
|
||||
|
||||
mutex_unlock(&pm->lock);
|
||||
|
||||
@ -361,7 +361,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
|
||||
|
||||
retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
|
||||
if (!retval)
|
||||
kq_submit_packet(pm->priv_queue);
|
||||
retval = kq_submit_packet(pm->priv_queue);
|
||||
else
|
||||
kq_rollback_packet(pm->priv_queue);
|
||||
|
||||
@ -392,7 +392,7 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
|
||||
|
||||
retval = pm->pmf->set_grace_period(pm, buffer, grace_period);
|
||||
if (!retval)
|
||||
kq_submit_packet(pm->priv_queue);
|
||||
retval = kq_submit_packet(pm->priv_queue);
|
||||
else
|
||||
kq_rollback_packet(pm->priv_queue);
|
||||
}
|
||||
@ -421,7 +421,7 @@ int pm_send_unmap_queue(struct packet_manager *pm,
|
||||
|
||||
retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);
|
||||
if (!retval)
|
||||
kq_submit_packet(pm->priv_queue);
|
||||
retval = kq_submit_packet(pm->priv_queue);
|
||||
else
|
||||
kq_rollback_packet(pm->priv_queue);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user