drm/amd/amdgpu: move inc gpu_reset_counter after drm_sched_stop
Move gpu_reset_counter after drm_sched_stop to avoid race condition caused by job submitted between reset_count +1 and drm_sched_stop. Signed-off-by: Jingwen Chen <Jingwen.Chen2@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
8f4828d0a1
commit
8f8c80f430
@ -4449,7 +4449,6 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
|
||||
down_write(&adev->reset_sem);
|
||||
}
|
||||
|
||||
atomic_inc(&adev->gpu_reset_counter);
|
||||
switch (amdgpu_asic_reset_method(adev)) {
|
||||
case AMD_RESET_METHOD_MODE1:
|
||||
adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
|
||||
@ -4710,6 +4709,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
if (need_emergency_restart)
|
||||
amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
|
||||
}
|
||||
atomic_inc(&tmp_adev->gpu_reset_counter);
|
||||
}
|
||||
|
||||
if (need_emergency_restart)
|
||||
@ -5052,6 +5052,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
|
||||
|
||||
drm_sched_stop(&ring->sched, NULL);
|
||||
}
|
||||
atomic_inc(&adev->gpu_reset_counter);
|
||||
return PCI_ERS_RESULT_NEED_RESET;
|
||||
case pci_channel_io_perm_failure:
|
||||
/* Permanent error, prepare for device removal */
|
||||
|
Loading…
x
Reference in New Issue
Block a user