drm/amdgpu: add ring timeout information in devcoredump

Add ring timeout related information in the amdgpu
devcoredump file for debugging purposes.

During the gpu recovery process the registered call
is triggered and add the debug information in data
file created by devcoredump framework under the
directory /sys/class/devcoredump/devcdx/

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Sunil Khatri 2024-03-01 17:35:35 +05:30 committed by Alex Deucher
parent 2bdebcb1e4
commit 5e592956cc
2 changed files with 15 additions and 0 deletions

View File

@ -196,6 +196,13 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
coredump->reset_task_info.process_name,
coredump->reset_task_info.pid);
if (coredump->ring) {
drm_printf(&p, "\nRing timed out details\n");
drm_printf(&p, "IP Type: %d Ring Name: %s\n",
coredump->ring->funcs->type,
coredump->ring->name);
}
if (coredump->reset_vram_lost)
drm_printf(&p, "VRAM is lost due to GPU reset!\n");
if (coredump->adev->reset_info.num_regs) {
@ -220,6 +227,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
{
struct amdgpu_coredump_info *coredump;
struct drm_device *dev = adev_to_drm(adev);
struct amdgpu_job *job = reset_context->job;
struct drm_sched_job *s_job;
coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
@ -241,6 +250,11 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
}
}
if (job) {
s_job = &job->base;
coredump->ring = to_amdgpu_ring(s_job->sched);
}
coredump->adev = adev;
ktime_get_ts64(&coredump->reset_time);

View File

@ -97,6 +97,7 @@ struct amdgpu_coredump_info {
struct amdgpu_task_info reset_task_info;
struct timespec64 reset_time;
bool reset_vram_lost;
struct amdgpu_ring *ring;
};
#endif