drm/amdgpu: Conditionally reset RAS counters on boot

Only clear RAS error counters if perestent EDC harvesting is not supported

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: John Clements <john.clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
John Clements 2021-05-17 16:36:26 +08:00 committed by Alex Deucher
parent 8ef4f94add
commit 8f6368a9c9
4 changed files with 13 additions and 13 deletions

View File

@ -607,7 +607,6 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
struct ras_ih_if ih_info = {
.cb = amdgpu_gfx_process_ras_data_cb,
};
struct ras_query_if info = { 0 };
if (!adev->gfx.ras_if) {
adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
@ -625,12 +624,8 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
goto free;
if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
if (adev->gmc.xgmi.connected_to_cpu) {
info.head = *adev->gfx.ras_if;
amdgpu_ras_query_error_status(adev, &info);
} else {
if (!amdgpu_persistent_edc_harvesting_supported(adev))
amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
}
r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
if (r)

View File

@ -2194,7 +2194,7 @@ release_con:
return r;
}
static int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
{
if (adev->gmc.xgmi.connected_to_cpu)
return 1;

View File

@ -625,4 +625,7 @@ void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev);
void amdgpu_release_ras_context(struct amdgpu_device *adev);
int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev);
#endif

View File

@ -1268,13 +1268,15 @@ static int gmc_v9_0_late_init(void *handle)
}
}
if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->reset_ras_error_count)
adev->mmhub.ras_funcs->reset_ras_error_count(adev);
if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->reset_ras_error_count)
adev->mmhub.ras_funcs->reset_ras_error_count(adev);
if (adev->hdp.ras_funcs &&
adev->hdp.ras_funcs->reset_ras_error_count)
adev->hdp.ras_funcs->reset_ras_error_count(adev);
if (adev->hdp.ras_funcs &&
adev->hdp.ras_funcs->reset_ras_error_count)
adev->hdp.ras_funcs->reset_ras_error_count(adev);
}
r = amdgpu_gmc_ras_late_init(adev);
if (r)