drm/msm/gpu: Track global faults per address-space
Other processes don't need to know about faults that they are isolated from by virtue of address space isolation. They are only interested in whether some of their state might have been corrupted. But to be safe, also track unattributed faults. This case should really never happen unless there is a kernel bug (and that would never happen, right?) v2: Instead of adding a new param, just change the behavior of the existing param to match what userspace actually wants [anholt] Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5934 Signed-off-by: Rob Clark <robdclark@chromium.org> Link: https://lore.kernel.org/r/20220201161618.778455-3-robdclark@gmail.com Reviewed-by: Emma Anholt <emma@anholt.net> Signed-off-by: Rob Clark <robdclark@chromium.org>
This commit is contained in:
parent
f98f915b7e
commit
bc2112583a
@ -269,7 +269,7 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx,
|
||||
*value = 0;
|
||||
return 0;
|
||||
case MSM_PARAM_FAULTS:
|
||||
*value = gpu->global_faults;
|
||||
*value = gpu->global_faults + ctx->aspace->faults;
|
||||
return 0;
|
||||
case MSM_PARAM_SUSPENDS:
|
||||
*value = gpu->suspend_count;
|
||||
|
@ -35,6 +35,9 @@ struct msm_gem_address_space {
|
||||
* will be non-NULL:
|
||||
*/
|
||||
struct pid *pid;
|
||||
|
||||
/* @faults: the number of GPU hangs associated with this address space */
|
||||
int faults;
|
||||
};
|
||||
|
||||
struct msm_gem_vma {
|
||||
|
@ -370,8 +370,8 @@ static void recover_worker(struct kthread_work *work)
|
||||
struct task_struct *task;
|
||||
|
||||
/* Increment the fault counts */
|
||||
gpu->global_faults++;
|
||||
submit->queue->faults++;
|
||||
submit->aspace->faults++;
|
||||
|
||||
task = get_pid_task(submit->pid, PIDTYPE_PID);
|
||||
if (task) {
|
||||
@ -389,6 +389,12 @@ static void recover_worker(struct kthread_work *work)
|
||||
} else {
|
||||
msm_rd_dump_submit(priv->hangrd, submit, NULL);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* We couldn't attribute this fault to any particular context,
|
||||
* so increment the global fault count instead.
|
||||
*/
|
||||
gpu->global_faults++;
|
||||
}
|
||||
|
||||
/* Record the crash state */
|
||||
|
@ -200,7 +200,10 @@ struct msm_gpu {
|
||||
/* does gpu need hw_init? */
|
||||
bool needs_hw_init;
|
||||
|
||||
/* number of GPU hangs (for all contexts) */
|
||||
/**
|
||||
* global_faults: number of GPU hangs not attributed to a particular
|
||||
* address space
|
||||
*/
|
||||
int global_faults;
|
||||
|
||||
void __iomem *mmio;
|
||||
|
Loading…
Reference in New Issue
Block a user