drm/amdgpu: introduce vram lost for reset (v2)
for SOC15/vega10 the BACO reset & mode1 would introduce vram lost in high end address range, current kmd's vram lost checking cannot catch it since it only check very ahead visible frame buffer v2: cover NV as well Signed-off-by: Monk Liu <Monk.Liu@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
514ad79103
commit
e352625796
@ -1151,6 +1151,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
|||||||
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
|
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
|
||||||
#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
|
#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
|
||||||
#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
|
#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
|
||||||
|
#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
|
||||||
|
|
||||||
/* Common functions */
|
/* Common functions */
|
||||||
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
|
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
|
||||||
|
@ -3483,7 +3483,7 @@ error:
|
|||||||
amdgpu_virt_init_data_exchange(adev);
|
amdgpu_virt_init_data_exchange(adev);
|
||||||
amdgpu_virt_release_full_gpu(adev, true);
|
amdgpu_virt_release_full_gpu(adev, true);
|
||||||
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
|
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
|
||||||
atomic_inc(&adev->vram_lost_counter);
|
amdgpu_inc_vram_lost(adev);
|
||||||
r = amdgpu_device_recover_vram(adev);
|
r = amdgpu_device_recover_vram(adev);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3649,7 +3649,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
|
|||||||
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
|
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
|
||||||
if (vram_lost) {
|
if (vram_lost) {
|
||||||
DRM_INFO("VRAM is lost due to GPU reset!\n");
|
DRM_INFO("VRAM is lost due to GPU reset!\n");
|
||||||
atomic_inc(&tmp_adev->vram_lost_counter);
|
amdgpu_inc_vram_lost(tmp_adev);
|
||||||
}
|
}
|
||||||
|
|
||||||
r = amdgpu_gtt_mgr_recover(
|
r = amdgpu_gtt_mgr_recover(
|
||||||
|
@ -316,10 +316,13 @@ static int nv_asic_reset(struct amdgpu_device *adev)
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
struct smu_context *smu = &adev->smu;
|
struct smu_context *smu = &adev->smu;
|
||||||
|
|
||||||
if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
|
if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
|
||||||
|
amdgpu_inc_vram_lost(adev);
|
||||||
ret = smu_baco_reset(smu);
|
ret = smu_baco_reset(smu);
|
||||||
else
|
} else {
|
||||||
|
amdgpu_inc_vram_lost(adev);
|
||||||
ret = nv_asic_mode1_reset(adev);
|
ret = nv_asic_mode1_reset(adev);
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -558,10 +558,12 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
|
|||||||
{
|
{
|
||||||
switch (soc15_asic_reset_method(adev)) {
|
switch (soc15_asic_reset_method(adev)) {
|
||||||
case AMD_RESET_METHOD_BACO:
|
case AMD_RESET_METHOD_BACO:
|
||||||
|
amdgpu_inc_vram_lost(adev);
|
||||||
return soc15_asic_baco_reset(adev);
|
return soc15_asic_baco_reset(adev);
|
||||||
case AMD_RESET_METHOD_MODE2:
|
case AMD_RESET_METHOD_MODE2:
|
||||||
return soc15_mode2_reset(adev);
|
return soc15_mode2_reset(adev);
|
||||||
default:
|
default:
|
||||||
|
amdgpu_inc_vram_lost(adev);
|
||||||
return soc15_asic_mode1_reset(adev);
|
return soc15_asic_mode1_reset(adev);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user