drm/amdgpu: Workaround to skip kiq ring test during ras gpu recovery

This is workaround, kiq ring test failed in suspend stage when do ras
recovery.

Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Stanley.Yang 2023-10-17 21:49:09 +08:00 committed by Alex Deucher
parent 5509e59611
commit b1338a8e71

View File

@ -29,6 +29,7 @@
#include "amdgpu_rlc.h"
#include "amdgpu_ras.h"
#include "amdgpu_xcp.h"
#include "amdgpu_xgmi.h"
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
@ -501,6 +502,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *kiq_ring = &kiq->ring;
struct amdgpu_hive_info *hive;
struct amdgpu_ras *ras;
int hive_ras_recovery = 0;
int i, r = 0;
int j;
@ -521,6 +525,23 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
RESET_QUEUES, 0, 0);
}
/**
* This is workaround: only skip kiq_ring test
* during ras recovery in suspend stage for gfx9.4.3
*/
hive = amdgpu_get_xgmi_hive(adev);
if (hive) {
hive_ras_recovery = atomic_read(&hive->ras_recovery);
amdgpu_put_xgmi_hive(hive);
}
ras = amdgpu_ras_get_context(adev);
if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) {
spin_unlock(&kiq->ring_lock);
return 0;
}
if (kiq_ring->sched.ready && !adev->job_hang)
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);