drm/amdkfd: Make doorbell size ASIC-dependent
This prepares for GFXv9 (Vega10), which has 64-bit doorbells. Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
parent
642a0e8026
commit
ada2b29c4a
@ -41,6 +41,7 @@ static const struct kfd_device_info kaveri_device_info = {
|
|||||||
.max_pasid_bits = 16,
|
.max_pasid_bits = 16,
|
||||||
/* max num of queues for KV.TODO should be a dynamic value */
|
/* max num of queues for KV.TODO should be a dynamic value */
|
||||||
.max_no_of_hqd = 24,
|
.max_no_of_hqd = 24,
|
||||||
|
.doorbell_size = 4,
|
||||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||||
.event_interrupt_class = &event_interrupt_class_cik,
|
.event_interrupt_class = &event_interrupt_class_cik,
|
||||||
.num_of_watch_points = 4,
|
.num_of_watch_points = 4,
|
||||||
@ -55,6 +56,7 @@ static const struct kfd_device_info carrizo_device_info = {
|
|||||||
.max_pasid_bits = 16,
|
.max_pasid_bits = 16,
|
||||||
/* max num of queues for CZ.TODO should be a dynamic value */
|
/* max num of queues for CZ.TODO should be a dynamic value */
|
||||||
.max_no_of_hqd = 24,
|
.max_no_of_hqd = 24,
|
||||||
|
.doorbell_size = 4,
|
||||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||||
.event_interrupt_class = &event_interrupt_class_cik,
|
.event_interrupt_class = &event_interrupt_class_cik,
|
||||||
.num_of_watch_points = 4,
|
.num_of_watch_points = 4,
|
||||||
@ -70,6 +72,7 @@ static const struct kfd_device_info hawaii_device_info = {
|
|||||||
.max_pasid_bits = 16,
|
.max_pasid_bits = 16,
|
||||||
/* max num of queues for KV.TODO should be a dynamic value */
|
/* max num of queues for KV.TODO should be a dynamic value */
|
||||||
.max_no_of_hqd = 24,
|
.max_no_of_hqd = 24,
|
||||||
|
.doorbell_size = 4,
|
||||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||||
.event_interrupt_class = &event_interrupt_class_cik,
|
.event_interrupt_class = &event_interrupt_class_cik,
|
||||||
.num_of_watch_points = 4,
|
.num_of_watch_points = 4,
|
||||||
@ -83,6 +86,7 @@ static const struct kfd_device_info tonga_device_info = {
|
|||||||
.asic_family = CHIP_TONGA,
|
.asic_family = CHIP_TONGA,
|
||||||
.max_pasid_bits = 16,
|
.max_pasid_bits = 16,
|
||||||
.max_no_of_hqd = 24,
|
.max_no_of_hqd = 24,
|
||||||
|
.doorbell_size = 4,
|
||||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||||
.event_interrupt_class = &event_interrupt_class_cik,
|
.event_interrupt_class = &event_interrupt_class_cik,
|
||||||
.num_of_watch_points = 4,
|
.num_of_watch_points = 4,
|
||||||
@ -96,6 +100,7 @@ static const struct kfd_device_info tonga_vf_device_info = {
|
|||||||
.asic_family = CHIP_TONGA,
|
.asic_family = CHIP_TONGA,
|
||||||
.max_pasid_bits = 16,
|
.max_pasid_bits = 16,
|
||||||
.max_no_of_hqd = 24,
|
.max_no_of_hqd = 24,
|
||||||
|
.doorbell_size = 4,
|
||||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||||
.event_interrupt_class = &event_interrupt_class_cik,
|
.event_interrupt_class = &event_interrupt_class_cik,
|
||||||
.num_of_watch_points = 4,
|
.num_of_watch_points = 4,
|
||||||
@ -109,6 +114,7 @@ static const struct kfd_device_info fiji_device_info = {
|
|||||||
.asic_family = CHIP_FIJI,
|
.asic_family = CHIP_FIJI,
|
||||||
.max_pasid_bits = 16,
|
.max_pasid_bits = 16,
|
||||||
.max_no_of_hqd = 24,
|
.max_no_of_hqd = 24,
|
||||||
|
.doorbell_size = 4,
|
||||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||||
.event_interrupt_class = &event_interrupt_class_cik,
|
.event_interrupt_class = &event_interrupt_class_cik,
|
||||||
.num_of_watch_points = 4,
|
.num_of_watch_points = 4,
|
||||||
@ -122,6 +128,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
|
|||||||
.asic_family = CHIP_FIJI,
|
.asic_family = CHIP_FIJI,
|
||||||
.max_pasid_bits = 16,
|
.max_pasid_bits = 16,
|
||||||
.max_no_of_hqd = 24,
|
.max_no_of_hqd = 24,
|
||||||
|
.doorbell_size = 4,
|
||||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||||
.event_interrupt_class = &event_interrupt_class_cik,
|
.event_interrupt_class = &event_interrupt_class_cik,
|
||||||
.num_of_watch_points = 4,
|
.num_of_watch_points = 4,
|
||||||
@ -136,6 +143,7 @@ static const struct kfd_device_info polaris10_device_info = {
|
|||||||
.asic_family = CHIP_POLARIS10,
|
.asic_family = CHIP_POLARIS10,
|
||||||
.max_pasid_bits = 16,
|
.max_pasid_bits = 16,
|
||||||
.max_no_of_hqd = 24,
|
.max_no_of_hqd = 24,
|
||||||
|
.doorbell_size = 4,
|
||||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||||
.event_interrupt_class = &event_interrupt_class_cik,
|
.event_interrupt_class = &event_interrupt_class_cik,
|
||||||
.num_of_watch_points = 4,
|
.num_of_watch_points = 4,
|
||||||
@ -149,6 +157,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
|
|||||||
.asic_family = CHIP_POLARIS10,
|
.asic_family = CHIP_POLARIS10,
|
||||||
.max_pasid_bits = 16,
|
.max_pasid_bits = 16,
|
||||||
.max_no_of_hqd = 24,
|
.max_no_of_hqd = 24,
|
||||||
|
.doorbell_size = 4,
|
||||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||||
.event_interrupt_class = &event_interrupt_class_cik,
|
.event_interrupt_class = &event_interrupt_class_cik,
|
||||||
.num_of_watch_points = 4,
|
.num_of_watch_points = 4,
|
||||||
@ -162,6 +171,7 @@ static const struct kfd_device_info polaris11_device_info = {
|
|||||||
.asic_family = CHIP_POLARIS11,
|
.asic_family = CHIP_POLARIS11,
|
||||||
.max_pasid_bits = 16,
|
.max_pasid_bits = 16,
|
||||||
.max_no_of_hqd = 24,
|
.max_no_of_hqd = 24,
|
||||||
|
.doorbell_size = 4,
|
||||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||||
.event_interrupt_class = &event_interrupt_class_cik,
|
.event_interrupt_class = &event_interrupt_class_cik,
|
||||||
.num_of_watch_points = 4,
|
.num_of_watch_points = 4,
|
||||||
|
@ -33,7 +33,6 @@
|
|||||||
|
|
||||||
static DEFINE_IDA(doorbell_ida);
|
static DEFINE_IDA(doorbell_ida);
|
||||||
static unsigned int max_doorbell_slices;
|
static unsigned int max_doorbell_slices;
|
||||||
#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Each device exposes a doorbell aperture, a PCI MMIO aperture that
|
* Each device exposes a doorbell aperture, a PCI MMIO aperture that
|
||||||
@ -50,9 +49,9 @@ static unsigned int max_doorbell_slices;
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* # of doorbell bytes allocated for each process. */
|
/* # of doorbell bytes allocated for each process. */
|
||||||
static inline size_t doorbell_process_allocation(void)
|
static size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
|
||||||
{
|
{
|
||||||
return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES *
|
return roundup(kfd->device_info->doorbell_size *
|
||||||
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
|
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
|
||||||
PAGE_SIZE);
|
PAGE_SIZE);
|
||||||
}
|
}
|
||||||
@ -72,16 +71,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
|
|||||||
|
|
||||||
doorbell_start_offset =
|
doorbell_start_offset =
|
||||||
roundup(kfd->shared_resources.doorbell_start_offset,
|
roundup(kfd->shared_resources.doorbell_start_offset,
|
||||||
doorbell_process_allocation());
|
kfd_doorbell_process_slice(kfd));
|
||||||
|
|
||||||
doorbell_aperture_size =
|
doorbell_aperture_size =
|
||||||
rounddown(kfd->shared_resources.doorbell_aperture_size,
|
rounddown(kfd->shared_resources.doorbell_aperture_size,
|
||||||
doorbell_process_allocation());
|
kfd_doorbell_process_slice(kfd));
|
||||||
|
|
||||||
if (doorbell_aperture_size > doorbell_start_offset)
|
if (doorbell_aperture_size > doorbell_start_offset)
|
||||||
doorbell_process_limit =
|
doorbell_process_limit =
|
||||||
(doorbell_aperture_size - doorbell_start_offset) /
|
(doorbell_aperture_size - doorbell_start_offset) /
|
||||||
doorbell_process_allocation();
|
kfd_doorbell_process_slice(kfd);
|
||||||
else
|
else
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
|
|
||||||
@ -95,7 +94,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
|
|||||||
kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
|
kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
|
||||||
|
|
||||||
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
|
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
|
||||||
doorbell_process_allocation());
|
kfd_doorbell_process_slice(kfd));
|
||||||
|
|
||||||
if (!kfd->doorbell_kernel_ptr)
|
if (!kfd->doorbell_kernel_ptr)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
@ -132,16 +131,16 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
|
|||||||
phys_addr_t address;
|
phys_addr_t address;
|
||||||
struct kfd_dev *dev;
|
struct kfd_dev *dev;
|
||||||
|
|
||||||
|
/* Find kfd device according to gpu id */
|
||||||
|
dev = kfd_device_by_id(vma->vm_pgoff);
|
||||||
|
if (!dev)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For simplicitly we only allow mapping of the entire doorbell
|
* For simplicitly we only allow mapping of the entire doorbell
|
||||||
* allocation of a single device & process.
|
* allocation of a single device & process.
|
||||||
*/
|
*/
|
||||||
if (vma->vm_end - vma->vm_start != doorbell_process_allocation())
|
if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
/* Find kfd device according to gpu id */
|
|
||||||
dev = kfd_device_by_id(vma->vm_pgoff);
|
|
||||||
if (!dev)
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/* Calculate physical address of doorbell */
|
/* Calculate physical address of doorbell */
|
||||||
@ -158,19 +157,19 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
|
|||||||
" vm_flags == 0x%04lX\n"
|
" vm_flags == 0x%04lX\n"
|
||||||
" size == 0x%04lX\n",
|
" size == 0x%04lX\n",
|
||||||
(unsigned long long) vma->vm_start, address, vma->vm_flags,
|
(unsigned long long) vma->vm_start, address, vma->vm_flags,
|
||||||
doorbell_process_allocation());
|
kfd_doorbell_process_slice(dev));
|
||||||
|
|
||||||
|
|
||||||
return io_remap_pfn_range(vma,
|
return io_remap_pfn_range(vma,
|
||||||
vma->vm_start,
|
vma->vm_start,
|
||||||
address >> PAGE_SHIFT,
|
address >> PAGE_SHIFT,
|
||||||
doorbell_process_allocation(),
|
kfd_doorbell_process_slice(dev),
|
||||||
vma->vm_page_prot);
|
vma->vm_page_prot);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* get kernel iomem pointer for a doorbell */
|
/* get kernel iomem pointer for a doorbell */
|
||||||
u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
||||||
unsigned int *doorbell_off)
|
unsigned int *doorbell_off)
|
||||||
{
|
{
|
||||||
u32 inx;
|
u32 inx;
|
||||||
@ -185,6 +184,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
|||||||
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
|
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
inx *= kfd->device_info->doorbell_size / sizeof(u32);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Calculating the kernel doorbell offset using the first
|
* Calculating the kernel doorbell offset using the first
|
||||||
* doorbell page.
|
* doorbell page.
|
||||||
@ -210,7 +211,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
|
|||||||
mutex_unlock(&kfd->doorbell_mutex);
|
mutex_unlock(&kfd->doorbell_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
|
void write_kernel_doorbell(void __iomem *db, u32 value)
|
||||||
{
|
{
|
||||||
if (db) {
|
if (db) {
|
||||||
writel(value, db);
|
writel(value, db);
|
||||||
@ -228,20 +229,21 @@ unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
|
|||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* doorbell_id_offset accounts for doorbells taken by KGD.
|
* doorbell_id_offset accounts for doorbells taken by KGD.
|
||||||
* index * doorbell_process_allocation/sizeof(u32) adjusts to
|
* index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
|
||||||
* the process's doorbells.
|
* the process's doorbells. The offset returned is in dword
|
||||||
|
* units regardless of the ASIC-dependent doorbell size.
|
||||||
*/
|
*/
|
||||||
return kfd->doorbell_id_offset +
|
return kfd->doorbell_id_offset +
|
||||||
process->doorbell_index
|
process->doorbell_index
|
||||||
* doorbell_process_allocation() / sizeof(u32) +
|
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
|
||||||
queue_id;
|
queue_id * kfd->device_info->doorbell_size / sizeof(u32);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
|
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
|
||||||
{
|
{
|
||||||
uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
|
uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
|
||||||
kfd->shared_resources.doorbell_start_offset) /
|
kfd->shared_resources.doorbell_start_offset) /
|
||||||
doorbell_process_allocation() + 1;
|
kfd_doorbell_process_slice(kfd) + 1;
|
||||||
|
|
||||||
return num_of_elems;
|
return num_of_elems;
|
||||||
|
|
||||||
@ -251,7 +253,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
|
|||||||
struct kfd_process *process)
|
struct kfd_process *process)
|
||||||
{
|
{
|
||||||
return dev->doorbell_base +
|
return dev->doorbell_base +
|
||||||
process->doorbell_index * doorbell_process_allocation();
|
process->doorbell_index * kfd_doorbell_process_slice(dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kfd_alloc_process_doorbells(struct kfd_process *process)
|
int kfd_alloc_process_doorbells(struct kfd_process *process)
|
||||||
|
@ -160,6 +160,7 @@ struct kfd_device_info {
|
|||||||
const struct kfd_event_interrupt_class *event_interrupt_class;
|
const struct kfd_event_interrupt_class *event_interrupt_class;
|
||||||
unsigned int max_pasid_bits;
|
unsigned int max_pasid_bits;
|
||||||
unsigned int max_no_of_hqd;
|
unsigned int max_no_of_hqd;
|
||||||
|
unsigned int doorbell_size;
|
||||||
size_t ih_ring_entry_size;
|
size_t ih_ring_entry_size;
|
||||||
uint8_t num_of_watch_points;
|
uint8_t num_of_watch_points;
|
||||||
uint16_t mqd_size_aligned;
|
uint16_t mqd_size_aligned;
|
||||||
@ -364,7 +365,7 @@ struct queue_properties {
|
|||||||
uint32_t queue_percent;
|
uint32_t queue_percent;
|
||||||
uint32_t *read_ptr;
|
uint32_t *read_ptr;
|
||||||
uint32_t *write_ptr;
|
uint32_t *write_ptr;
|
||||||
uint32_t __iomem *doorbell_ptr;
|
void __iomem *doorbell_ptr;
|
||||||
uint32_t doorbell_off;
|
uint32_t doorbell_off;
|
||||||
bool is_interop;
|
bool is_interop;
|
||||||
bool is_evicted;
|
bool is_evicted;
|
||||||
@ -728,11 +729,11 @@ void kfd_pasid_free(unsigned int pasid);
|
|||||||
int kfd_doorbell_init(struct kfd_dev *kfd);
|
int kfd_doorbell_init(struct kfd_dev *kfd);
|
||||||
void kfd_doorbell_fini(struct kfd_dev *kfd);
|
void kfd_doorbell_fini(struct kfd_dev *kfd);
|
||||||
int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
|
int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
|
||||||
u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
||||||
unsigned int *doorbell_off);
|
unsigned int *doorbell_off);
|
||||||
void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
|
void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
|
||||||
u32 read_kernel_doorbell(u32 __iomem *db);
|
u32 read_kernel_doorbell(u32 __iomem *db);
|
||||||
void write_kernel_doorbell(u32 __iomem *db, u32 value);
|
void write_kernel_doorbell(void __iomem *db, u32 value);
|
||||||
unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
|
unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
|
||||||
struct kfd_process *process,
|
struct kfd_process *process,
|
||||||
unsigned int queue_id);
|
unsigned int queue_id);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user