drm/amdgpu: use doorbell mgr for kfd kernel doorbells
This patch: - adds a doorbell bo in kfd device structure. - creates doorbell page for kfd kernel usages. - updates the get_kernel_doorbell and free_kernel_doorbell functions accordingly V2: Do not use wrapper API, use direct amdgpu_create_kernel(Alex) V3: - Move single variable declaration below (Christian) - Add a to-do item to reuse the KGD kernel level doorbells for KFD for non-MES cases, instead of reserving one page (Felix) Cc: Alex Deucher <alexander.deucher@amd.com> Cc: Christian Koenig <christian.koenig@amd.com> Cc: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
76bd34786e
commit
c318666510
@ -455,8 +455,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
|
|||||||
atomic_set(&kfd->compute_profile, 0);
|
atomic_set(&kfd->compute_profile, 0);
|
||||||
|
|
||||||
mutex_init(&kfd->doorbell_mutex);
|
mutex_init(&kfd->doorbell_mutex);
|
||||||
memset(&kfd->doorbell_available_index, 0,
|
|
||||||
sizeof(kfd->doorbell_available_index));
|
|
||||||
|
|
||||||
ida_init(&kfd->doorbell_ida);
|
ida_init(&kfd->doorbell_ida);
|
||||||
|
|
||||||
|
@ -61,81 +61,46 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
|
|||||||
/* Doorbell calculations for device init. */
|
/* Doorbell calculations for device init. */
|
||||||
int kfd_doorbell_init(struct kfd_dev *kfd)
|
int kfd_doorbell_init(struct kfd_dev *kfd)
|
||||||
{
|
{
|
||||||
size_t doorbell_start_offset;
|
int size = PAGE_SIZE;
|
||||||
size_t doorbell_aperture_size;
|
int r;
|
||||||
size_t doorbell_process_limit;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* With MES enabled, just set the doorbell base as it is needed
|
* Todo: KFD kernel level operations need only one doorbell for
|
||||||
* to calculate doorbell physical address.
|
* ring test/HWS. So instead of reserving a whole page here for
|
||||||
|
* kernel, reserve and consume a doorbell from existing KGD kernel
|
||||||
|
* doorbell page.
|
||||||
*/
|
*/
|
||||||
if (kfd->shared_resources.enable_mes) {
|
|
||||||
kfd->doorbell_base =
|
/* Bitmap to dynamically allocate doorbells from kernel page */
|
||||||
kfd->shared_resources.doorbell_physical_address;
|
kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);
|
||||||
return 0;
|
if (!kfd->doorbell_bitmap) {
|
||||||
|
DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
|
||||||
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* Alloc a doorbell page for KFD kernel usages */
|
||||||
* We start with calculations in bytes because the input data might
|
r = amdgpu_bo_create_kernel(kfd->adev,
|
||||||
* only be byte-aligned.
|
size,
|
||||||
* Only after we have done the rounding can we assume any alignment.
|
PAGE_SIZE,
|
||||||
*/
|
AMDGPU_GEM_DOMAIN_DOORBELL,
|
||||||
|
&kfd->doorbells,
|
||||||
doorbell_start_offset =
|
NULL,
|
||||||
roundup(kfd->shared_resources.doorbell_start_offset,
|
(void **)&kfd->doorbell_kernel_ptr);
|
||||||
kfd_doorbell_process_slice(kfd));
|
if (r) {
|
||||||
|
pr_err("failed to allocate kernel doorbells\n");
|
||||||
doorbell_aperture_size =
|
bitmap_free(kfd->doorbell_bitmap);
|
||||||
rounddown(kfd->shared_resources.doorbell_aperture_size,
|
return r;
|
||||||
kfd_doorbell_process_slice(kfd));
|
}
|
||||||
|
|
||||||
if (doorbell_aperture_size > doorbell_start_offset)
|
|
||||||
doorbell_process_limit =
|
|
||||||
(doorbell_aperture_size - doorbell_start_offset) /
|
|
||||||
kfd_doorbell_process_slice(kfd);
|
|
||||||
else
|
|
||||||
return -ENOSPC;
|
|
||||||
|
|
||||||
if (!kfd->max_doorbell_slices ||
|
|
||||||
doorbell_process_limit < kfd->max_doorbell_slices)
|
|
||||||
kfd->max_doorbell_slices = doorbell_process_limit;
|
|
||||||
|
|
||||||
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
|
|
||||||
doorbell_start_offset;
|
|
||||||
|
|
||||||
kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
|
|
||||||
|
|
||||||
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
|
|
||||||
kfd_doorbell_process_slice(kfd));
|
|
||||||
|
|
||||||
if (!kfd->doorbell_kernel_ptr)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
pr_debug("Doorbell initialization:\n");
|
|
||||||
pr_debug("doorbell base == 0x%08lX\n",
|
|
||||||
(uintptr_t)kfd->doorbell_base);
|
|
||||||
|
|
||||||
pr_debug("doorbell_base_dw_offset == 0x%08lX\n",
|
|
||||||
kfd->doorbell_base_dw_offset);
|
|
||||||
|
|
||||||
pr_debug("doorbell_process_limit == 0x%08lX\n",
|
|
||||||
doorbell_process_limit);
|
|
||||||
|
|
||||||
pr_debug("doorbell_kernel_offset == 0x%08lX\n",
|
|
||||||
(uintptr_t)kfd->doorbell_base);
|
|
||||||
|
|
||||||
pr_debug("doorbell aperture size == 0x%08lX\n",
|
|
||||||
kfd->shared_resources.doorbell_aperture_size);
|
|
||||||
|
|
||||||
pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
|
|
||||||
|
|
||||||
|
pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void kfd_doorbell_fini(struct kfd_dev *kfd)
|
void kfd_doorbell_fini(struct kfd_dev *kfd)
|
||||||
{
|
{
|
||||||
if (kfd->doorbell_kernel_ptr)
|
bitmap_free(kfd->doorbell_bitmap);
|
||||||
iounmap(kfd->doorbell_kernel_ptr);
|
amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
|
||||||
|
(void **)&kfd->doorbell_kernel_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,
|
int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,
|
||||||
@ -188,22 +153,15 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
|||||||
u32 inx;
|
u32 inx;
|
||||||
|
|
||||||
mutex_lock(&kfd->doorbell_mutex);
|
mutex_lock(&kfd->doorbell_mutex);
|
||||||
inx = find_first_zero_bit(kfd->doorbell_available_index,
|
inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));
|
||||||
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
|
|
||||||
|
|
||||||
__set_bit(inx, kfd->doorbell_available_index);
|
__set_bit(inx, kfd->doorbell_bitmap);
|
||||||
mutex_unlock(&kfd->doorbell_mutex);
|
mutex_unlock(&kfd->doorbell_mutex);
|
||||||
|
|
||||||
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
|
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
inx *= kfd->device_info.doorbell_size / sizeof(u32);
|
*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
|
||||||
|
|
||||||
/*
|
|
||||||
* Calculating the kernel doorbell offset using the first
|
|
||||||
* doorbell page.
|
|
||||||
*/
|
|
||||||
*doorbell_off = kfd->doorbell_base_dw_offset + inx;
|
|
||||||
|
|
||||||
pr_debug("Get kernel queue doorbell\n"
|
pr_debug("Get kernel queue doorbell\n"
|
||||||
" doorbell offset == 0x%08X\n"
|
" doorbell offset == 0x%08X\n"
|
||||||
@ -217,11 +175,10 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
|
|||||||
{
|
{
|
||||||
unsigned int inx;
|
unsigned int inx;
|
||||||
|
|
||||||
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
|
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
|
||||||
* sizeof(u32) / kfd->device_info.doorbell_size;
|
|
||||||
|
|
||||||
mutex_lock(&kfd->doorbell_mutex);
|
mutex_lock(&kfd->doorbell_mutex);
|
||||||
__clear_bit(inx, kfd->doorbell_available_index);
|
__clear_bit(inx, kfd->doorbell_bitmap);
|
||||||
mutex_unlock(&kfd->doorbell_mutex);
|
mutex_unlock(&kfd->doorbell_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -385,6 +385,12 @@ struct kfd_dev {
|
|||||||
/* Track per device allocated watch points */
|
/* Track per device allocated watch points */
|
||||||
uint32_t alloc_watch_ids;
|
uint32_t alloc_watch_ids;
|
||||||
spinlock_t watch_points_lock;
|
spinlock_t watch_points_lock;
|
||||||
|
|
||||||
|
/* Kernel doorbells for KFD device */
|
||||||
|
struct amdgpu_bo *doorbells;
|
||||||
|
|
||||||
|
/* bitmap for dynamic doorbell allocation from doorbell object */
|
||||||
|
unsigned long *doorbell_bitmap;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum kfd_mempool {
|
enum kfd_mempool {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user