drm/msm/gpu: Skip retired submits in recover worker

If we somehow raced with submit retiring, either while waiting for
worker to have a chance to run or acquiring the gpu lock, then the
recover worker should just bail.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Patchwork: https://patchwork.freedesktop.org/patch/568034/
This commit is contained in:
Rob Clark 2023-11-17 07:24:28 -08:00
parent 4bea53b9c7
commit 12578c075f

View File

@ -365,7 +365,14 @@ static void recover_worker(struct kthread_work *work)
DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name); DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name);
submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
if (submit) {
/*
* If the submit retired while we were waiting for the worker to run,
* or waiting to acquire the gpu lock, then nothing more to do.
*/
if (!submit)
goto out_unlock;
/* Increment the fault counts */ /* Increment the fault counts */
submit->queue->faults++; submit->queue->faults++;
if (submit->aspace) if (submit->aspace)
@ -380,15 +387,10 @@ static void recover_worker(struct kthread_work *work)
msm_rd_dump_submit(priv->hangrd, submit, msm_rd_dump_submit(priv->hangrd, submit,
"offending task: %s (%s)", comm, cmd); "offending task: %s (%s)", comm, cmd);
} else { } else {
DRM_DEV_ERROR(dev->dev, "%s: offending task: unknown\n", gpu->name);
msm_rd_dump_submit(priv->hangrd, submit, NULL); msm_rd_dump_submit(priv->hangrd, submit, NULL);
} }
} else {
/*
* We couldn't attribute this fault to any particular context,
* so increment the global fault count instead.
*/
gpu->global_faults++;
}
/* Record the crash state */ /* Record the crash state */
pm_runtime_get_sync(&gpu->pdev->dev); pm_runtime_get_sync(&gpu->pdev->dev);
@ -440,6 +442,7 @@ static void recover_worker(struct kthread_work *work)
pm_runtime_put(&gpu->pdev->dev); pm_runtime_put(&gpu->pdev->dev);
out_unlock:
mutex_unlock(&gpu->lock); mutex_unlock(&gpu->lock);
msm_gpu_retire(gpu); msm_gpu_retire(gpu);