drm/amdgpu: Enable mode-1 reset for RAS recovery in fatal error mode

The patch is enabling mode-1 reset for RAS recovery in fatal error mode.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
YiPeng Chai 2022-11-08 17:11:18 +08:00 committed by Alex Deucher
parent 64a3dbb06a
commit 1a11a65d53
2 changed files with 10 additions and 1 deletions

View File

@ -4593,6 +4593,10 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
if (amdgpu_gpu_recovery == 0)
goto disabled;
/* Skip soft reset check in fatal error mode */
if (!amdgpu_ras_is_poison_mode_supported(adev))
return true;
if (!amdgpu_device_ip_check_soft_reset(adev)) {
dev_info(adev->dev,"Timeout, but no hardware hang detected.\n");
return false;

View File

@ -1948,7 +1948,12 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
/* Perform full reset in fatal error mode */
if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
else
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
}