drm/amdgpu: Correctly use bo_va->ref_count in compute VMs

This is needed to correctly handle BOs imported into compute VM from gfx.
Both kfd and gfx should use same bo_va and set bo_va->ref_count correctly
when map the Bos into same VM, otherwise we may trigger kernel general
protection when iterate mappings over bo_va's valids or invalids list.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Xiaogang Chen <Xiaogang.Chen@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
Tested-by: Xiaogang Chen <Xiaogang.Chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Xiaogang Chen 2023-10-12 15:48:06 -05:00 committed by Alex Deucher
parent f20f3b0d6c
commit ffa88b0019

View File

@ -834,6 +834,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
uint64_t va = mem->va;
struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
struct amdgpu_bo *bo[2] = {NULL, NULL};
struct amdgpu_bo_va *bo_va;
bool same_hive = false;
int i, ret;
@ -921,7 +922,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
pr_debug("Unable to reserve BO during memory attach");
goto unwind;
}
attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
bo_va = amdgpu_vm_bo_find(vm, bo[i]);
if (!bo_va)
bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
else
++bo_va->ref_count;
attachment[i]->bo_va = bo_va;
amdgpu_bo_unreserve(bo[i]);
if (unlikely(!attachment[i]->bo_va)) {
ret = -ENOMEM;
@ -945,7 +951,8 @@ unwind:
continue;
if (attachment[i]->bo_va) {
amdgpu_bo_reserve(bo[i], true);
amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
if (--attachment[i]->bo_va->ref_count == 0)
amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
amdgpu_bo_unreserve(bo[i]);
list_del(&attachment[i]->list);
}
@ -962,7 +969,8 @@ static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
pr_debug("\t remove VA 0x%llx in entry %p\n",
attachment->va, attachment);
amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
if (--attachment->bo_va->ref_count == 0)
amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
drm_gem_object_put(&bo->tbo.base);
list_del(&attachment->list);
kfree(attachment);