drm/amdkfd: make sure VM is ready for updating operations
When page table BOs were evicted but not validated before updating page tables, VM is still in evicting state, amdgpu_vm_update_range returns -EBUSY and restore_process_worker runs into a dead loop. v2: Split the BO validation and page table update into two separate loops in amdgpu_amdkfd_restore_process_bos. (Felix) 1.Validate BOs 2.Validate VM (and DMABuf attachments) 3.Update page tables for the BOs validated above Fixes: 50661eb1a2c8 ("drm/amdgpu: Auto-validate DMABuf imports in compute VMs") Signed-off-by: Lang Yu <Lang.Yu@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
e53a1713de
commit
81bf14519a
@ -2901,13 +2901,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
|
||||
|
||||
amdgpu_sync_create(&sync_obj);
|
||||
|
||||
/* Validate BOs and map them to GPUVM (update VM page tables). */
|
||||
/* Validate BOs managed by KFD */
|
||||
list_for_each_entry(mem, &process_info->kfd_bo_list,
|
||||
validate_list) {
|
||||
|
||||
struct amdgpu_bo *bo = mem->bo;
|
||||
uint32_t domain = mem->domain;
|
||||
struct kfd_mem_attachment *attachment;
|
||||
struct dma_resv_iter cursor;
|
||||
struct dma_fence *fence;
|
||||
|
||||
@ -2932,6 +2931,25 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
|
||||
goto validate_map_fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (failed_size)
|
||||
pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
|
||||
|
||||
/* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
|
||||
* validations above would invalidate DMABuf imports again.
|
||||
*/
|
||||
ret = process_validate_vms(process_info, &exec.ticket);
|
||||
if (ret) {
|
||||
pr_debug("Validating VMs failed, ret: %d\n", ret);
|
||||
goto validate_map_fail;
|
||||
}
|
||||
|
||||
/* Update mappings managed by KFD. */
|
||||
list_for_each_entry(mem, &process_info->kfd_bo_list,
|
||||
validate_list) {
|
||||
struct kfd_mem_attachment *attachment;
|
||||
|
||||
list_for_each_entry(attachment, &mem->attachments, list) {
|
||||
if (!attachment->is_mapped)
|
||||
continue;
|
||||
@ -2948,18 +2966,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
|
||||
}
|
||||
}
|
||||
|
||||
if (failed_size)
|
||||
pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
|
||||
|
||||
/* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
|
||||
* validations above would invalidate DMABuf imports again.
|
||||
*/
|
||||
ret = process_validate_vms(process_info, &exec.ticket);
|
||||
if (ret) {
|
||||
pr_debug("Validating VMs failed, ret: %d\n", ret);
|
||||
goto validate_map_fail;
|
||||
}
|
||||
|
||||
/* Update mappings not managed by KFD */
|
||||
list_for_each_entry(peer_vm, &process_info->vm_list_head,
|
||||
vm_list_node) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user