From b8935a7c4b4f2c487c639eb9071b4e8f0cbeac4a Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 20 Sep 2017 18:10:13 -0400 Subject: [PATCH 01/17] drm/amdkfd: Reorganize kfd resume code The idea is to let kfd init and resume function share the same code path as much as possible, rather than to have two copies of almost identical code. That way improves the code readability and maintainability. Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 78 +++++++++++++------------ 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 5df12b287201..e5d1387c34ae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -92,6 +92,8 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size); static void kfd_gtt_sa_fini(struct kfd_dev *kfd); +static int kfd_resume(struct kfd_dev *kfd); + static const struct kfd_device_info *lookup_device_info(unsigned short did) { size_t i; @@ -169,15 +171,8 @@ static bool device_iommu_pasid_init(struct kfd_dev *kfd) (unsigned int)(1 << kfd->device_info->max_pasid_bits), iommu_info.max_pasids); - err = amd_iommu_init_device(kfd->pdev, pasid_limit); - if (err < 0) { - dev_err(kfd_device, "error initializing iommu device\n"); - return false; - } - if (!kfd_set_pasid_limit(pasid_limit)) { dev_err(kfd_device, "error setting pasid limit\n"); - amd_iommu_free_device(kfd->pdev); return false; } @@ -273,29 +268,22 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_interrupt_error; } - if (!device_iommu_pasid_init(kfd)) { - dev_err(kfd_device, - "Error initializing iommuv2 for device %x:%x\n", - kfd->pdev->vendor, kfd->pdev->device); - goto device_iommu_pasid_error; - } - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, - iommu_pasid_shutdown_callback); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); - kfd->dqm = device_queue_manager_init(kfd); if (!kfd->dqm) { dev_err(kfd_device, "Error initializing queue manager\n"); goto device_queue_manager_error; } - if (kfd->dqm->ops.start(kfd->dqm)) { + if (!device_iommu_pasid_init(kfd)) { dev_err(kfd_device, - "Error starting queue manager for device %x:%x\n", + "Error initializing iommuv2 for device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); - goto dqm_start_error; + goto device_iommu_pasid_error; } + if (kfd_resume(kfd)) + goto kfd_resume_error; + kfd->dbgmgr = NULL; kfd->init_complete = true; @@ -307,11 +295,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto out; -dqm_start_error: +kfd_resume_error: +device_iommu_pasid_error: device_queue_manager_uninit(kfd->dqm); device_queue_manager_error: - amd_iommu_free_device(kfd->pdev); -device_iommu_pasid_error: kfd_interrupt_exit(kfd); kfd_interrupt_error: kfd_topology_remove_device(kfd); @@ -331,8 +318,8 @@ out: void kgd2kfd_device_exit(struct kfd_dev *kfd) { if (kfd->init_complete) { + kgd2kfd_suspend(kfd); device_queue_manager_uninit(kfd->dqm); - amd_iommu_free_device(kfd->pdev); kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); kfd_doorbell_fini(kfd); @@ -355,25 +342,40 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) int kgd2kfd_resume(struct kfd_dev *kfd) { - unsigned int pasid_limit; - int err; + if (!kfd->init_complete) + return 0; - pasid_limit = kfd_get_pasid_limit(); + return kfd_resume(kfd); - if (kfd->init_complete) { - err = amd_iommu_init_device(kfd->pdev, pasid_limit); - if (err < 0) { - dev_err(kfd_device, "failed to initialize iommu\n"); - return -ENXIO; - } +} - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, - iommu_pasid_shutdown_callback); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); - kfd->dqm->ops.start(kfd->dqm); +static int kfd_resume(struct kfd_dev *kfd) +{ + int err = 0; + unsigned int pasid_limit = kfd_get_pasid_limit(); + + err = amd_iommu_init_device(kfd->pdev, pasid_limit); + if (err) + return -ENXIO; + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, + iommu_pasid_shutdown_callback); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, + iommu_invalid_ppr_cb); + + err = kfd->dqm->ops.start(kfd->dqm); + if (err) { + dev_err(kfd_device, + "Error starting queue manager for device %x:%x\n", + kfd->pdev->vendor, kfd->pdev->device); + goto dqm_start_error; } - return 0; + return err; + +dqm_start_error: + amd_iommu_free_device(kfd->pdev); + + return err; } /* This is called directly from KGD at ISR. */ From 733fa1f7428c362b17b3de3a1c691e21fa803239 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 20 Sep 2017 18:10:14 -0400 Subject: [PATCH 02/17] drm/amdkfd: Fix suspend/resume issue on Carrizo v2 When we do suspend/resume through "sudo pm-suspend" while there is HSA activity running, upon resume we will encounter HWS hanging, which is caused by memory read/write failures. The root cause is that when suspend, we neglected to unbind pasid from kfd device. Another major change is that the bind/unbinding is changed to be performed on a per process basis, instead of whether there are queues in dqm. v2: - free IOMMU device if kfd_bind_processes_to_device fails in kfd_resume - add comments to kfd_bind/unbind_processes_to/from_device - minor cleanups Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 23 +++-- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 13 --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 15 ++- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 97 ++++++++++++++++--- 4 files changed, 109 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index e5d1387c34ae..80c90f3619b7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -184,7 +184,7 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); if (dev) - kfd_unbind_process_from_device(dev, pasid); + kfd_process_iommu_unbind_callback(dev, pasid); } /* @@ -332,12 +332,16 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) void kgd2kfd_suspend(struct kfd_dev *kfd) { - if (kfd->init_complete) { - kfd->dqm->ops.stop(kfd->dqm); - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); - amd_iommu_free_device(kfd->pdev); - } + if (!kfd->init_complete) + return; + + kfd->dqm->ops.stop(kfd->dqm); + + kfd_unbind_processes_from_device(kfd); + + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); + amd_iommu_free_device(kfd->pdev); } int kgd2kfd_resume(struct kfd_dev *kfd) @@ -362,6 +366,10 @@ static int kfd_resume(struct kfd_dev *kfd) amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); + err = kfd_bind_processes_to_device(kfd); + if (err) + goto processes_bind_error; + err = kfd->dqm->ops.start(kfd->dqm); if (err) { dev_err(kfd_device, @@ -373,6 +381,7 @@ static int kfd_resume(struct kfd_dev *kfd) return err; dqm_start_error: +processes_bind_error: amd_iommu_free_device(kfd->pdev); return err; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 53a66e821624..5db82b877deb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -670,7 +670,6 @@ static int initialize_cpsch(struct device_queue_manager *dqm) static int start_cpsch(struct device_queue_manager *dqm) { - struct device_process_node *node; int retval; retval = 0; @@ -697,11 +696,6 @@ static int start_cpsch(struct device_queue_manager *dqm) init_interrupts(dqm); - list_for_each_entry(node, &dqm->queues, list) - if (node->qpd->pqm->process && dqm->dev) - kfd_bind_process_to_device(dqm->dev, - node->qpd->pqm->process); - execute_queues_cpsch(dqm, true); return 0; @@ -714,15 +708,8 @@ fail_packet_manager_init: static int stop_cpsch(struct device_queue_manager *dqm) { - struct device_process_node *node; - struct kfd_process_device *pdd; - destroy_queues_cpsch(dqm, true, true); - list_for_each_entry(node, &dqm->queues, list) { - pdd = qpd_to_pdd(node->qpd); - pdd->bound = false; - } kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); pm_uninit(&dqm->packets); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 634083e340d1..c71160422348 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -432,6 +432,13 @@ struct qcm_process_device { uint32_t sh_hidden_private_base; }; + +enum kfd_pdd_bound { + PDD_UNBOUND = 0, + PDD_BOUND, + PDD_BOUND_SUSPENDED, +}; + /* Data that is per-process-per device. */ struct kfd_process_device { /* @@ -456,7 +463,7 @@ struct kfd_process_device { uint64_t scratch_limit; /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ - bool bound; + enum kfd_pdd_bound bound; /* This flag tells if we should reset all * wavefronts on process termination @@ -547,8 +554,10 @@ struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, - struct kfd_process *p); -void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid); + struct kfd_process *p); +int kfd_bind_processes_to_device(struct kfd_dev *dev); +void kfd_unbind_processes_from_device(struct kfd_dev *dev); +void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid); struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p); struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 9e65ce3c1967..1325f88591ae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -174,9 +174,10 @@ static void kfd_process_wq_release(struct work_struct *work) if (pdd->reset_wavefronts) dbgdev_wave_reset_wavefronts(pdd->dev, p); - amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); - list_del(&pdd->per_device_list); + if (pdd->bound == PDD_BOUND) + amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); + list_del(&pdd->per_device_list); kfree(pdd); } @@ -351,9 +352,9 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, list_for_each_entry(pdd, &p->per_device_data, per_device_list) if (pdd->dev == dev) - break; + return pdd; - return pdd; + return NULL; } struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, @@ -368,6 +369,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); pdd->qpd.dqm = dev->dqm; pdd->reset_wavefronts = false; + pdd->bound = PDD_UNBOUND; list_add(&pdd->per_device_list, &p->per_device_data); } @@ -393,19 +395,91 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, return ERR_PTR(-ENOMEM); } - if (pdd->bound) + if (pdd->bound == PDD_BOUND) { return pdd; + } else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { + pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); + return ERR_PTR(-EINVAL); + } err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); if (err < 0) return ERR_PTR(err); - pdd->bound = true; + pdd->bound = PDD_BOUND; return pdd; } -void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) +/* + * Bind processes do the device that have been temporarily unbound + * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. + */ +int kfd_bind_processes_to_device(struct kfd_dev *dev) +{ + struct kfd_process_device *pdd; + struct kfd_process *p; + unsigned int temp; + int err = 0; + + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + mutex_lock(&p->mutex); + pdd = kfd_get_process_device_data(dev, p); + if (pdd->bound != PDD_BOUND_SUSPENDED) { + mutex_unlock(&p->mutex); + continue; + } + + err = amd_iommu_bind_pasid(dev->pdev, p->pasid, + p->lead_thread); + if (err < 0) { + pr_err("unexpected pasid %d binding failure\n", + p->pasid); + mutex_unlock(&p->mutex); + break; + } + + pdd->bound = PDD_BOUND; + mutex_unlock(&p->mutex); + } + + srcu_read_unlock(&kfd_processes_srcu, idx); + + return err; +} + +/* + * Temporarily unbind currently bound processes from the device and + * mark them as PDD_BOUND_SUSPENDED. These processes will be restored + * to PDD_BOUND state in kfd_bind_processes_to_device. + */ +void kfd_unbind_processes_from_device(struct kfd_dev *dev) +{ + struct kfd_process_device *pdd; + struct kfd_process *p; + unsigned int temp, temp_bound, temp_pasid; + + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + mutex_lock(&p->mutex); + pdd = kfd_get_process_device_data(dev, p); + temp_bound = pdd->bound; + temp_pasid = p->pasid; + if (pdd->bound == PDD_BOUND) + pdd->bound = PDD_BOUND_SUSPENDED; + mutex_unlock(&p->mutex); + + if (temp_bound == PDD_BOUND) + amd_iommu_unbind_pasid(dev->pdev, temp_pasid); + } + + srcu_read_unlock(&kfd_processes_srcu, idx); +} + +void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) { struct kfd_process *p; struct kfd_process_device *pdd; @@ -438,15 +512,6 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) pdd->reset_wavefronts = false; } - /* - * Just mark pdd as unbound, because we still need it - * to call amd_iommu_unbind_pasid() in when the - * process exits. - * We don't call amd_iommu_unbind_pasid() here - * because the IOMMU called us. - */ - pdd->bound = false; - mutex_unlock(&p->mutex); } From 8c72c3d7dfa86f7e84c5397975eb9c803e4de7b6 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 20 Sep 2017 18:10:15 -0400 Subject: [PATCH 03/17] drm/amdkfd: Rectify the jiffies calculation error with milliseconds v2 The timeout in milliseconds should not be regarded as jiffies. This commit fixed that. v2: - use msecs_to_jiffies - change timeout_ms parameter to unsigned int to match msecs_to_jiffies Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 5db82b877deb..87961fe669e1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -835,12 +835,12 @@ out: int amdkfd_fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, - unsigned long timeout) + unsigned int timeout_ms) { - timeout += jiffies; + unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; while (*fence_addr != fence_value) { - if (time_after(jiffies, timeout)) { + if (time_after(jiffies, end_jiffies)) { pr_err("qcm fence wait loop timeout expired\n"); return -ETIME; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index c71160422348..f37fbfd0e917 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -672,7 +672,7 @@ struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, int amdkfd_fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, - unsigned long timeout); + unsigned int timeout_ms); /* Packet Manager */ From b90e3fbecc9030efb8a6aed1d54a79d0c3d0820a Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 20 Sep 2017 18:10:16 -0400 Subject: [PATCH 04/17] drm/amdkfd: Adjust dequeue latencies and timeouts Adjust latencies and timeouts for dequeueing with HWS and consolidate them in one place. Make them longer to allow long running waves to complete without causing a timeout. The timeout is twice as long as the latency plus some buffer to make sure we don't detect a timeout prematurely. Change timeouts for dequeueing HQDs without HWS. KFD_UNMAP_LATENCY is more consistent with what the HWS does for user queues. Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 4 +++- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 --- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 87961fe669e1..dd60c6eec962 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -323,7 +323,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, - QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, + KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); if (retval) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index faf820a06400..99e23053f59d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -29,7 +29,9 @@ #include "kfd_priv.h" #include "kfd_mqd_manager.h" -#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (500) +#define KFD_UNMAP_LATENCY_MS (4000) +#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000) + #define CIK_VMID_NUM (8) #define KFD_VMID_START_OFFSET (8) #define VMID_PER_DEVICE CIK_VMID_NUM diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index ed71ad40e8f7..d53d32a91a16 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -185,7 +185,7 @@ static void uninitialize(struct kernel_queue *kq) kq->mqd->destroy_mqd(kq->mqd, kq->queue->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, - QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, + KFD_UNMAP_LATENCY_MS, kq->queue->pipe, kq->queue->queue); else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 1d312603de9f..9eda884d8d48 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -376,7 +376,7 @@ int pm_send_set_resources(struct packet_manager *pm, packet->bitfields2.queue_type = queue_type__mes_set_resources__hsa_interface_queue_hiq; packet->bitfields2.vmid_mask = res->vmid_mask; - packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY; + packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; packet->bitfields7.oac_mask = res->oac_mask; packet->bitfields8.gds_heap_base = res->gds_heap_base; packet->bitfields8.gds_heap_size = res->gds_heap_size; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f37fbfd0e917..8883416634d3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -676,11 +676,8 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, /* Packet Manager */ -#define KFD_HIQ_TIMEOUT (500) - #define KFD_FENCE_COMPLETED (100) #define KFD_FENCE_INIT (10) -#define KFD_UNMAP_LATENCY (150) struct packet_manager { struct device_queue_manager *dqm; From 44008d7a871ce5a487cbcc4c412a5149145ea442 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 20 Sep 2017 18:10:18 -0400 Subject: [PATCH 05/17] drm/amdkfd: Use VMID bitmap from KGD v2 The hard-coded values related to VMID were removed in KFD, as those values can be calculated in the KFD initialization function. v2: remove unnecessary local variable Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 9 ++------- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 5 +++++ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 13 ++++++------- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 4 ---- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 +++++++ .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +- 6 files changed, 21 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 0aa021aa0aa1..7d5635fdd6e7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -769,13 +769,8 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) union GRBM_GFX_INDEX_BITS reg_gfx_index; struct kfd_process_device *pdd; struct dbg_wave_control_info wac_info; - int temp; - int first_vmid_to_scan = 8; - int last_vmid_to_scan = 15; - - first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1; - temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan; - last_vmid_to_scan = first_vmid_to_scan + ffz(temp); + int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; + int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; reg_sq_cmd.u32All = 0; status = 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 80c90f3619b7..46049f005b02 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -219,6 +219,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->shared_resources = *gpu_resources; + kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; + kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; + kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd + - kfd->vm_info.first_vmid_kfd + 1; + /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index dd60c6eec962..87f8742dcd8c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -113,11 +113,11 @@ static int allocate_vmid(struct device_queue_manager *dqm, if (dqm->vmid_bitmap == 0) return -ENOMEM; - bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM); + bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, + dqm->dev->vm_info.vmid_num_kfd); clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap); - /* Kaveri kfd vmid's starts from vmid 8 */ - allocated_vmid = bit + KFD_VMID_START_OFFSET; + allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; pr_debug("vmid allocation %d\n", allocated_vmid); qpd->vmid = allocated_vmid; q->properties.vmid = allocated_vmid; @@ -132,7 +132,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { - int bit = qpd->vmid - KFD_VMID_START_OFFSET; + int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); @@ -507,7 +507,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) dqm->allocated_queues[pipe] |= 1 << queue; } - dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; + dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; return 0; @@ -613,8 +613,7 @@ static int set_sched_resources(struct device_queue_manager *dqm) int i, mec; struct scheduling_resources res; - res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; - res.vmid_mask <<= KFD_VMID_START_OFFSET; + res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; res.queue_mask = 0; for (i = 0; i < KGD_MAX_QUEUES; ++i) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 99e23053f59d..60d46ce0b8a7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -32,10 +32,6 @@ #define KFD_UNMAP_LATENCY_MS (4000) #define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000) -#define CIK_VMID_NUM (8) -#define KFD_VMID_START_OFFSET (8) -#define VMID_PER_DEVICE CIK_VMID_NUM -#define KFD_DQM_FIRST_PIPE (0) #define CIK_SDMA_QUEUES (4) #define CIK_SDMA_QUEUES_PER_ENGINE (2) #define CIK_SDMA_ENGINE_NUM (2) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 8883416634d3..f0ca60f27d9a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -141,6 +141,12 @@ struct kfd_mem_obj { uint32_t *cpu_ptr; }; +struct kfd_vmid_info { + uint32_t first_vmid_kfd; + uint32_t last_vmid_kfd; + uint32_t vmid_num_kfd; +}; + struct kfd_dev { struct kgd_dev *kgd; @@ -162,6 +168,7 @@ struct kfd_dev { */ struct kgd2kfd_shared_resources shared_resources; + struct kfd_vmid_info vm_info; const struct kfd2kgd_calls *kfd2kgd; struct mutex doorbell_mutex; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 03bec765b03d..68fe0d99f6c2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -187,7 +187,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && - ((dev->dqm->processes_count >= VMID_PER_DEVICE) || + ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); retval = -EPERM; From e596b90338126d08a8d90f08e79bb644b8c9f2b6 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 20 Sep 2017 18:10:19 -0400 Subject: [PATCH 06/17] drm/amdkfd: Reuse CHIP_* from amdgpu v2 There are already CHIP_* definitions under amd_shared.h file on amdgpu side, so KFD should reuse them rather than defining new ones. Using enum for asic type requires default cases on switch statements to prevent compiler warnings. WARN on unsupported ASICs. It should never get there because KFD should not be initialized on unsupported devices. v2: Replace BUG() with WARN and error return Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 4 ++++ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 18 ++++++++++++------ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 3 +++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 +++------ 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 87f8742dcd8c..fe0f0de0ef86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1130,6 +1130,10 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_KAVERI: device_queue_manager_init_cik(&dqm->ops_asic_specific); break; + default: + WARN(1, "Unexpected ASIC family %u", + dev->device_info->asic_family); + goto out_free; } if (!dqm->ops.initialize(dqm)) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index d53d32a91a16..8b0c0645d7c0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -303,14 +303,20 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, case CHIP_KAVERI: kernel_queue_init_cik(&kq->ops_asic_specific); break; + default: + WARN(1, "Unexpected ASIC family %u", + dev->device_info->asic_family); + goto out_free; } - if (!kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) { - pr_err("Failed to init kernel queue\n"); - kfree(kq); - return NULL; - } - return kq; + if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) + return kq; + + pr_err("Failed to init kernel queue\n"); + +out_free: + kfree(kq); + return NULL; } void kernel_queue_uninit(struct kernel_queue *kq) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index b1ef1368c3bb..dfd260ef81ff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -31,6 +31,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, return mqd_manager_init_cik(type, dev); case CHIP_CARRIZO: return mqd_manager_init_vi(type, dev); + default: + WARN(1, "Unexpected ASIC family %u", + dev->device_info->asic_family); } return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f0ca60f27d9a..5ebe565fecb3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -33,6 +33,8 @@ #include #include +#include "amd_shared.h" + #define KFD_SYSFS_FILE_MODE 0444 #define KFD_MMAP_DOORBELL_MASK 0x8000000000000 @@ -112,11 +114,6 @@ enum cache_policy { cache_policy_noncoherent }; -enum asic_family_type { - CHIP_KAVERI = 0, - CHIP_CARRIZO -}; - struct kfd_event_interrupt_class { bool (*interrupt_isr)(struct kfd_dev *dev, const uint32_t *ih_ring_entry); @@ -125,7 +122,7 @@ struct kfd_event_interrupt_class { }; struct kfd_device_info { - unsigned int asic_family; + enum amd_asic_type asic_family; const struct kfd_event_interrupt_class *event_interrupt_class; unsigned int max_pasid_bits; unsigned int max_no_of_hqd; From 58dcd5bfcf99c9222a825ca67a0ecbd75516ec3f Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 20 Sep 2017 18:10:20 -0400 Subject: [PATCH 07/17] drm/amdkfd: Drop _nocpsch suffix from shared functions Several functions in DQM are shared between cpsch and nocpsch code. Remove the misleading _nocpsch suffix from their names. Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index fe0f0de0ef86..471b34e58225 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -386,7 +386,7 @@ out_unlock: return retval; } -static struct mqd_manager *get_mqd_manager_nocpsch( +static struct mqd_manager *get_mqd_manager( struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) { struct mqd_manager *mqd; @@ -407,7 +407,7 @@ static struct mqd_manager *get_mqd_manager_nocpsch( return mqd; } -static int register_process_nocpsch(struct device_queue_manager *dqm, +static int register_process(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct device_process_node *n; @@ -431,7 +431,7 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, return retval; } -static int unregister_process_nocpsch(struct device_queue_manager *dqm, +static int unregister_process(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { int retval; @@ -513,7 +513,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) return 0; } -static void uninitialize_nocpsch(struct device_queue_manager *dqm) +static void uninitialize(struct device_queue_manager *dqm) { int i; @@ -1095,10 +1095,10 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.stop = stop_cpsch; dqm->ops.destroy_queue = destroy_queue_cpsch; dqm->ops.update_queue = update_queue; - dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; - dqm->ops.register_process = register_process_nocpsch; - dqm->ops.unregister_process = unregister_process_nocpsch; - dqm->ops.uninitialize = uninitialize_nocpsch; + dqm->ops.get_mqd_manager = get_mqd_manager; + dqm->ops.register_process = register_process; + dqm->ops.unregister_process = unregister_process; + dqm->ops.uninitialize = uninitialize; dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; @@ -1110,11 +1110,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.create_queue = create_queue_nocpsch; dqm->ops.destroy_queue = destroy_queue_nocpsch; dqm->ops.update_queue = update_queue; - dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; - dqm->ops.register_process = register_process_nocpsch; - dqm->ops.unregister_process = unregister_process_nocpsch; + dqm->ops.get_mqd_manager = get_mqd_manager; + dqm->ops.register_process = register_process; + dqm->ops.unregister_process = unregister_process; dqm->ops.initialize = initialize_nocpsch; - dqm->ops.uninitialize = uninitialize_nocpsch; + dqm->ops.uninitialize = uninitialize; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; break; default: From 7da2bcf87617fb00386ce61024e1c84d045b4e4f Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 27 Sep 2017 00:09:48 -0400 Subject: [PATCH 08/17] drm/amdkfd: Avoid name confusion involved in queue unmapping When unmapping the queues from HW scheduler, there are two actions: reset and preempt. So naming the variables with only preempt is inapproriate. For functions such as destroy_queues_cpsch, what they do actually is to unmap the queues on HW scheduler rather than to destroy them. Change the name to reflect that fact. On the other hand, there is already a function called destroy_queue_cpsch() which exactly destroys a queue, and the name is very close to destroy_queues_cpsch(), resulting in confusion. Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 +++++++++---------- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 18 +++++------ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 20 ++++++------ 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 471b34e58225..1995e0aa7fa3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -45,8 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); -static int destroy_queues_cpsch(struct device_queue_manager *dqm, - bool preempt_static_queues, bool lock); +static int unmap_queues_cpsch(struct device_queue_manager *dqm, + bool static_queues_included, bool lock); static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, @@ -707,7 +707,7 @@ fail_packet_manager_init: static int stop_cpsch(struct device_queue_manager *dqm) { - destroy_queues_cpsch(dqm, true, true); + unmap_queues_cpsch(dqm, true, true); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); pm_uninit(&dqm->packets); @@ -750,7 +750,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, { mutex_lock(&dqm->lock); /* here we actually preempt the DIQ */ - destroy_queues_cpsch(dqm, true, false); + unmap_queues_cpsch(dqm, true, false); list_del(&kq->list); dqm->queue_count--; qpd->is_debug = false; @@ -849,19 +849,19 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, return 0; } -static int destroy_sdma_queues(struct device_queue_manager *dqm, +static int unmap_sdma_queues(struct device_queue_manager *dqm, unsigned int sdma_engine) { return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, - KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, sdma_engine); } -static int destroy_queues_cpsch(struct device_queue_manager *dqm, - bool preempt_static_queues, bool lock) +static int unmap_queues_cpsch(struct device_queue_manager *dqm, + bool static_queues_included, bool lock) { int retval; - enum kfd_preempt_type_filter preempt_type; + enum kfd_unmap_queues_filter filter; struct kfd_process_device *pdd; retval = 0; @@ -875,16 +875,16 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, dqm->sdma_queue_count); if (dqm->sdma_queue_count > 0) { - destroy_sdma_queues(dqm, 0); - destroy_sdma_queues(dqm, 1); + unmap_sdma_queues(dqm, 0); + unmap_sdma_queues(dqm, 1); } - preempt_type = preempt_static_queues ? - KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES : - KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES; + filter = static_queues_included ? + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, - preempt_type, 0, false, 0); + filter, 0, false, 0); if (retval) goto out; @@ -916,7 +916,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) if (lock) mutex_lock(&dqm->lock); - retval = destroy_queues_cpsch(dqm, false, false); + retval = unmap_queues_cpsch(dqm, false, false); if (retval) { pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption"); goto out; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 9eda884d8d48..e5a15babb4f3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -476,7 +476,7 @@ fail_acquire_packet_buffer: } int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, - enum kfd_preempt_type_filter mode, + enum kfd_unmap_queues_filter filter, uint32_t filter_param, bool reset, unsigned int sdma_engine) { @@ -494,8 +494,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, packet = (struct pm4_mes_unmap_queues *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); - pr_debug("static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n", - mode, reset, type); + pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n", + filter, reset, type); packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES, sizeof(struct pm4_mes_unmap_queues)); switch (type) { @@ -521,29 +521,29 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, packet->bitfields2.action = action__mes_unmap_queues__preempt_queues; - switch (mode) { - case KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE: + switch (filter) { + case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: packet->bitfields2.queue_sel = queue_sel__mes_unmap_queues__perform_request_on_specified_queues; packet->bitfields2.num_queues = 1; packet->bitfields3b.doorbell_offset0 = filter_param; break; - case KFD_PREEMPT_TYPE_FILTER_BY_PASID: + case KFD_UNMAP_QUEUES_FILTER_BY_PASID: packet->bitfields2.queue_sel = queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; packet->bitfields3a.pasid = filter_param; break; - case KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES: + case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: packet->bitfields2.queue_sel = queue_sel__mes_unmap_queues__unmap_all_queues; break; - case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES: + case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: /* in this case, we do not preempt static queues */ packet->bitfields2.queue_sel = queue_sel__mes_unmap_queues__unmap_all_non_static_queues; break; default: - WARN(1, "filter %d", mode); + WARN(1, "filter %d", filter); retval = -EINVAL; goto err_invalid; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 5ebe565fecb3..8af0d6fe257a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -222,22 +222,22 @@ void kfd_chardev_exit(void); struct device *kfd_chardev(void); /** - * enum kfd_preempt_type_filter + * enum kfd_unmap_queues_filter * - * @KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE: Preempts single queue. + * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue. * - * @KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES: Preempts all queues in the + * @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the * running queues list. * - * @KFD_PRERMPT_TYPE_FILTER_BY_PASID: Preempts queues that belongs to + * @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to * specific process. * */ -enum kfd_preempt_type_filter { - KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE, - KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, - KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, - KFD_PREEMPT_TYPE_FILTER_BY_PASID +enum kfd_unmap_queues_filter { + KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE, + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, + KFD_UNMAP_QUEUES_FILTER_BY_PASID }; /** @@ -700,7 +700,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, uint32_t fence_value); int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, - enum kfd_preempt_type_filter mode, + enum kfd_unmap_queues_filter mode, uint32_t filter_param, bool reset, unsigned int sdma_engine); From ac30c78384885b209324dacc7b65bd8e9cc69fbf Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Sun, 8 Oct 2017 14:57:18 +0300 Subject: [PATCH 09/17] drm/amdkfd: move locking outside of unmap_queues_cpsch Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 65 ++++++++----------- 1 file changed, 28 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 1995e0aa7fa3..0f9c39ba548a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -44,9 +44,9 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); -static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); +static int execute_queues_cpsch(struct device_queue_manager *dqm); static int unmap_queues_cpsch(struct device_queue_manager *dqm, - bool static_queues_included, bool lock); + bool static_queues_included); static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, @@ -379,7 +379,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) dqm->queue_count--; if (sched_policy != KFD_SCHED_POLICY_NO_HWS) - retval = execute_queues_cpsch(dqm, false); + retval = execute_queues_cpsch(dqm); out_unlock: mutex_unlock(&dqm->lock); @@ -695,7 +695,9 @@ static int start_cpsch(struct device_queue_manager *dqm) init_interrupts(dqm); - execute_queues_cpsch(dqm, true); + mutex_lock(&dqm->lock); + execute_queues_cpsch(dqm); + mutex_unlock(&dqm->lock); return 0; fail_allocate_vidmem: @@ -707,7 +709,9 @@ fail_packet_manager_init: static int stop_cpsch(struct device_queue_manager *dqm) { - unmap_queues_cpsch(dqm, true, true); + mutex_lock(&dqm->lock); + unmap_queues_cpsch(dqm, true); + mutex_unlock(&dqm->lock); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); pm_uninit(&dqm->packets); @@ -738,7 +742,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, list_add(&kq->list, &qpd->priv_queue_list); dqm->queue_count++; qpd->is_debug = true; - execute_queues_cpsch(dqm, false); + execute_queues_cpsch(dqm); mutex_unlock(&dqm->lock); return 0; @@ -750,11 +754,11 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, { mutex_lock(&dqm->lock); /* here we actually preempt the DIQ */ - unmap_queues_cpsch(dqm, true, false); + unmap_queues_cpsch(dqm, true); list_del(&kq->list); dqm->queue_count--; qpd->is_debug = false; - execute_queues_cpsch(dqm, false); + execute_queues_cpsch(dqm); /* * Unconditionally decrement this counter, regardless of the queue's * type. @@ -813,7 +817,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, list_add(&q->list, &qpd->queues_list); if (q->properties.is_active) { dqm->queue_count++; - retval = execute_queues_cpsch(dqm, false); + retval = execute_queues_cpsch(dqm); } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) @@ -857,8 +861,9 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm, sdma_engine); } +/* dqm->lock mutex has to be locked before calling this function */ static int unmap_queues_cpsch(struct device_queue_manager *dqm, - bool static_queues_included, bool lock) + bool static_queues_included) { int retval; enum kfd_unmap_queues_filter filter; @@ -866,10 +871,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, retval = 0; - if (lock) - mutex_lock(&dqm->lock); if (!dqm->active_runlist) - goto out; + return retval; pr_debug("Before destroying queues, sdma queue count is : %u\n", dqm->sdma_queue_count); @@ -886,7 +889,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, filter, 0, false, 0); if (retval) - goto out; + return retval; *dqm->fence_addr = KFD_FENCE_INIT; pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, @@ -898,50 +901,38 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, pdd = kfd_get_process_device_data(dqm->dev, kfd_get_process(current)); pdd->reset_wavefronts = true; - goto out; + return retval; } pm_release_ib(&dqm->packets); dqm->active_runlist = false; -out: - if (lock) - mutex_unlock(&dqm->lock); return retval; } -static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) +/* dqm->lock mutex has to be locked before calling this function */ +static int execute_queues_cpsch(struct device_queue_manager *dqm) { int retval; - if (lock) - mutex_lock(&dqm->lock); - - retval = unmap_queues_cpsch(dqm, false, false); + retval = unmap_queues_cpsch(dqm, false); if (retval) { pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption"); - goto out; + return retval; } - if (dqm->queue_count <= 0 || dqm->processes_count <= 0) { - retval = 0; - goto out; - } + if (dqm->queue_count <= 0 || dqm->processes_count <= 0) + return 0; - if (dqm->active_runlist) { - retval = 0; - goto out; - } + if (dqm->active_runlist) + return 0; retval = pm_send_runlist(&dqm->packets, &dqm->queues); if (retval) { pr_err("failed to execute runlist"); - goto out; + return retval; } dqm->active_runlist = true; -out: - if (lock) - mutex_unlock(&dqm->lock); return retval; } @@ -984,7 +975,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, if (q->properties.is_active) dqm->queue_count--; - execute_queues_cpsch(dqm, false); + execute_queues_cpsch(dqm); mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); From 4465f466c76774d3b5866929524cce6dd2d4dfb1 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Sun, 8 Oct 2017 14:57:52 +0300 Subject: [PATCH 10/17] drm/amdkfd: Pass filter params to unmap_queues_cpsch Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 0f9c39ba548a..be925a49dd10 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -46,7 +46,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, static int execute_queues_cpsch(struct device_queue_manager *dqm); static int unmap_queues_cpsch(struct device_queue_manager *dqm, - bool static_queues_included); + enum kfd_unmap_queues_filter filter, + uint32_t filter_param); static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, @@ -710,7 +711,7 @@ fail_packet_manager_init: static int stop_cpsch(struct device_queue_manager *dqm) { mutex_lock(&dqm->lock); - unmap_queues_cpsch(dqm, true); + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); mutex_unlock(&dqm->lock); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); @@ -754,7 +755,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, { mutex_lock(&dqm->lock); /* here we actually preempt the DIQ */ - unmap_queues_cpsch(dqm, true); + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); list_del(&kq->list); dqm->queue_count--; qpd->is_debug = false; @@ -863,10 +864,10 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm, /* dqm->lock mutex has to be locked before calling this function */ static int unmap_queues_cpsch(struct device_queue_manager *dqm, - bool static_queues_included) + enum kfd_unmap_queues_filter filter, + uint32_t filter_param) { int retval; - enum kfd_unmap_queues_filter filter; struct kfd_process_device *pdd; retval = 0; @@ -882,12 +883,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, unmap_sdma_queues(dqm, 1); } - filter = static_queues_included ? - KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; - retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, - filter, 0, false, 0); + filter, filter_param, false, 0); if (retval) return retval; @@ -914,7 +911,8 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm) { int retval; - retval = unmap_queues_cpsch(dqm, false); + retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, + 0); if (retval) { pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption"); return retval; From 60a00956577cdc70c85260df89f336f15a2f054c Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 27 Sep 2017 00:09:50 -0400 Subject: [PATCH 11/17] drm/amdkfd: Fix MQD updates When a queue is mapped, the MQD is owned by the FW. The FW overwrites the MQD on the next unmap operation. Therefore the queue must be unmapped before updating the MQD. For the non-HWS case, also fix disabling of queues and creation of queues in disabled state. Signed-off-by: Oak Zeng Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 84 ++++++++++++++----- 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index be925a49dd10..dccb4932bcac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -49,6 +49,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, uint32_t filter_param); +static int map_queues_cpsch(struct device_queue_manager *dqm); + static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); @@ -274,6 +276,9 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, dqm->dev->kfd2kgd->set_scratch_backing_va( dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); + if (!q->properties.is_active) + return 0; + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, q->process->mm); if (retval) @@ -365,23 +370,50 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) goto out_unlock; } - if (q->properties.is_active) - prev_active = true; + /* Save previous activity state for counters */ + prev_active = q->properties.is_active; + + /* Make sure the queue is unmapped before updating the MQD */ + if (sched_policy != KFD_SCHED_POLICY_NO_HWS) { + retval = unmap_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (retval != 0) { + pr_err("unmap queue failed\n"); + goto out_unlock; + } + } else if (sched_policy == KFD_SCHED_POLICY_NO_HWS && + prev_active && + (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + retval = mqd->destroy_mqd(mqd, q->mqd, + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, + KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); + if (retval) { + pr_err("destroy mqd failed\n"); + goto out_unlock; + } + } + + retval = mqd->update_mqd(mqd, q->mqd, &q->properties); + + if (sched_policy != KFD_SCHED_POLICY_NO_HWS) + retval = map_queues_cpsch(dqm); + else if (sched_policy == KFD_SCHED_POLICY_NO_HWS && + q->properties.is_active && + (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_SDMA)) + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, + &q->properties, q->process->mm); /* - * * check active state vs. the previous state * and modify counter accordingly */ - retval = mqd->update_mqd(mqd, q->mqd, &q->properties); - if ((q->properties.is_active) && (!prev_active)) + if (q->properties.is_active && !prev_active) dqm->queue_count++; else if (!q->properties.is_active && prev_active) dqm->queue_count--; - if (sched_policy != KFD_SCHED_POLICY_NO_HWS) - retval = execute_queues_cpsch(dqm); - out_unlock: mutex_unlock(&dqm->lock); return retval; @@ -862,6 +894,27 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm, sdma_engine); } +/* dqm->lock mutex has to be locked before calling this function */ +static int map_queues_cpsch(struct device_queue_manager *dqm) +{ + int retval; + + if (dqm->queue_count <= 0 || dqm->processes_count <= 0) + return 0; + + if (dqm->active_runlist) + return 0; + + retval = pm_send_runlist(&dqm->packets, &dqm->queues); + if (retval) { + pr_err("failed to execute runlist\n"); + return retval; + } + dqm->active_runlist = true; + + return retval; +} + /* dqm->lock mutex has to be locked before calling this function */ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, @@ -918,20 +971,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm) return retval; } - if (dqm->queue_count <= 0 || dqm->processes_count <= 0) - return 0; - - if (dqm->active_runlist) - return 0; - - retval = pm_send_runlist(&dqm->packets, &dqm->queues); - if (retval) { - pr_err("failed to execute runlist"); - return retval; - } - dqm->active_runlist = true; - - return retval; + return map_queues_cpsch(dqm); } static int destroy_queue_cpsch(struct device_queue_manager *dqm, From c4744e243c346b9e96857c6d11c716d31d475165 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 27 Sep 2017 00:09:51 -0400 Subject: [PATCH 12/17] drm/amdkfd: Avoid submitting an unnecessary packet to HWS v2: Make queue mapping interfaces more consistent by passing unmap filter parameters directly to execute_queues_cpsch, same as unmap_queues_cpsch. Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index dccb4932bcac..f5a5988257c8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -44,7 +44,9 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); -static int execute_queues_cpsch(struct device_queue_manager *dqm); +static int execute_queues_cpsch(struct device_queue_manager *dqm, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param); static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, uint32_t filter_param); @@ -729,7 +731,7 @@ static int start_cpsch(struct device_queue_manager *dqm) init_interrupts(dqm); mutex_lock(&dqm->lock); - execute_queues_cpsch(dqm); + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); mutex_unlock(&dqm->lock); return 0; @@ -775,7 +777,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, list_add(&kq->list, &qpd->priv_queue_list); dqm->queue_count++; qpd->is_debug = true; - execute_queues_cpsch(dqm); + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); mutex_unlock(&dqm->lock); return 0; @@ -786,12 +788,10 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { mutex_lock(&dqm->lock); - /* here we actually preempt the DIQ */ - unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); list_del(&kq->list); dqm->queue_count--; qpd->is_debug = false; - execute_queues_cpsch(dqm); + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); /* * Unconditionally decrement this counter, regardless of the queue's * type. @@ -850,7 +850,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, list_add(&q->list, &qpd->queues_list); if (q->properties.is_active) { dqm->queue_count++; - retval = execute_queues_cpsch(dqm); + retval = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) @@ -960,14 +961,15 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, } /* dqm->lock mutex has to be locked before calling this function */ -static int execute_queues_cpsch(struct device_queue_manager *dqm) +static int execute_queues_cpsch(struct device_queue_manager *dqm, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param) { int retval; - retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, - 0); + retval = unmap_queues_cpsch(dqm, filter, filter_param); if (retval) { - pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption"); + pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); return retval; } @@ -1013,7 +1015,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, if (q->properties.is_active) dqm->queue_count--; - execute_queues_cpsch(dqm); + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); From 9fd3f1bfae6c6c75f0c8aedb5d499d74cdb52eb9 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 27 Sep 2017 00:09:52 -0400 Subject: [PATCH 13/17] drm/amdkfd: Improve process termination handling Separate device queue termination from process queue manager termination. Unmap all queues at once instead of one at a time. Unmap device queues before the PASID is unbound, in the kfd_process_iommu_unbind_callback. When resetting wavefronts in non-HWS mode, do it before the VMID is released. Signed-off-by: Ben Goz Signed-off-by: shaoyun liu Signed-off-by: Amber Lin Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 185 ++++++++++++++---- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 5 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 18 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 36 +--- .../amd/amdkfd/kfd_process_queue_manager.c | 37 ++-- 5 files changed, 200 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f5a5988257c8..dd7e44593cfe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -296,65 +296,73 @@ out_deallocate_hqd: return retval; } -static int destroy_queue_nocpsch(struct device_queue_manager *dqm, +/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked + * to avoid asynchronized access + */ +static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) { int retval; struct mqd_manager *mqd; - retval = 0; - - mutex_lock(&dqm->lock); + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + if (!mqd) + return -ENOMEM; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (mqd == NULL) { - retval = -ENOMEM; - goto out; - } deallocate_hqd(dqm, q); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); - if (mqd == NULL) { - retval = -ENOMEM; - goto out; - } dqm->sdma_queue_count--; deallocate_sdma_queue(dqm, q->sdma_id); } else { pr_debug("q->properties.type %d is invalid\n", q->properties.type); - retval = -EINVAL; - goto out; + return -EINVAL; } + dqm->total_queue_count--; retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); - - if (retval) - goto out; + if (retval == -ETIME) + qpd->reset_wavefronts = true; mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); list_del(&q->list); - if (list_empty(&qpd->queues_list)) + if (list_empty(&qpd->queues_list)) { + if (qpd->reset_wavefronts) { + pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", + dqm->dev); + /* dbgdev_wave_reset_wavefronts has to be called before + * deallocate_vmid(), i.e. when vmid is still in use. + */ + dbgdev_wave_reset_wavefronts(dqm->dev, + qpd->pqm->process); + qpd->reset_wavefronts = false; + } + deallocate_vmid(dqm, qpd, q); + } if (q->properties.is_active) dqm->queue_count--; - /* - * Unconditionally decrement this counter, regardless of the queue's - * type - */ - dqm->total_queue_count--; - pr_debug("Total of %d queues are accountable so far\n", - dqm->total_queue_count); + return retval; +} -out: +static int destroy_queue_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + struct queue *q) +{ + int retval; + + mutex_lock(&dqm->lock); + retval = destroy_queue_nocpsch_locked(dqm, qpd, q); mutex_unlock(&dqm->lock); + return retval; } @@ -921,10 +929,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, uint32_t filter_param) { - int retval; - struct kfd_process_device *pdd; - - retval = 0; + int retval = 0; if (!dqm->active_runlist) return retval; @@ -948,12 +953,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, /* should be timed out */ retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); - if (retval) { - pdd = kfd_get_process_device_data(dqm->dev, - kfd_get_process(current)); - pdd->reset_wavefronts = true; + if (retval) return retval; - } + pm_release_ib(&dqm->packets); dqm->active_runlist = false; @@ -1015,7 +1017,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, if (q->properties.is_active) dqm->queue_count--; - execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + retval = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + if (retval == -ETIME) + qpd->reset_wavefronts = true; mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); @@ -1105,6 +1110,108 @@ out: return retval; } +static int process_termination_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct queue *q, *next; + struct device_process_node *cur, *next_dpn; + int retval = 0; + + mutex_lock(&dqm->lock); + + /* Clear all user mode queues */ + list_for_each_entry_safe(q, next, &qpd->queues_list, list) { + int ret; + + ret = destroy_queue_nocpsch_locked(dqm, qpd, q); + if (ret) + retval = ret; + } + + /* Unregister process */ + list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { + if (qpd == cur->qpd) { + list_del(&cur->list); + kfree(cur); + dqm->processes_count--; + break; + } + } + + mutex_unlock(&dqm->lock); + return retval; +} + + +static int process_termination_cpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + int retval; + struct queue *q, *next; + struct kernel_queue *kq, *kq_next; + struct mqd_manager *mqd; + struct device_process_node *cur, *next_dpn; + enum kfd_unmap_queues_filter filter = + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; + + retval = 0; + + mutex_lock(&dqm->lock); + + /* Clean all kernel queues */ + list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { + list_del(&kq->list); + dqm->queue_count--; + qpd->is_debug = false; + dqm->total_queue_count--; + filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; + } + + /* Clear all user mode queues */ + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count--; + + if (q->properties.is_active) + dqm->queue_count--; + + dqm->total_queue_count--; + } + + /* Unregister process */ + list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { + if (qpd == cur->qpd) { + list_del(&cur->list); + kfree(cur); + dqm->processes_count--; + break; + } + } + + retval = execute_queues_cpsch(dqm, filter, 0); + if (retval || qpd->reset_wavefronts) { + pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); + dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); + qpd->reset_wavefronts = false; + } + + /* lastly, free mqd resources */ + list_for_each_entry_safe(q, next, &qpd->queues_list, list) { + mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + if (!mqd) { + retval = -ENOMEM; + goto out; + } + list_del(&q->list); + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + } + +out: + mutex_unlock(&dqm->lock); + return retval; +} + struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) { struct device_queue_manager *dqm; @@ -1133,6 +1240,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.process_termination = process_termination_cpsch; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ @@ -1147,6 +1255,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.initialize = initialize_nocpsch; dqm->ops.uninitialize = uninitialize; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.process_termination = process_termination_nocpsch; break; default: pr_err("Invalid scheduling policy %d\n", sched_policy); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 60d46ce0b8a7..31c2b1f9d320 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -77,6 +77,8 @@ struct device_process_node { * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the * memory apertures. * + * @process_termination: Clears all process queues belongs to that device. + * */ struct device_queue_manager_ops { @@ -120,6 +122,9 @@ struct device_queue_manager_ops { enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); + + int (*process_termination)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); }; struct device_queue_manager_asic_ops { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 8af0d6fe257a..2e13f5d46d25 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -421,6 +421,12 @@ struct qcm_process_device { unsigned int queue_count; unsigned int vmid; bool is_debug; + + /* This flag tells if we should reset all wavefronts on + * process termination + */ + bool reset_wavefronts; + /* * All the memory management data should be here too */ @@ -454,6 +460,8 @@ struct kfd_process_device { /* The device that owns this data. */ struct kfd_dev *dev; + /* The process that owns this kfd_process_device. */ + struct kfd_process *process; /* per-process-per device QCM data structure */ struct qcm_process_device qpd; @@ -469,10 +477,12 @@ struct kfd_process_device { /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ enum kfd_pdd_bound bound; - /* This flag tells if we should reset all - * wavefronts on process termination + /* Flag used to tell the pdd has dequeued from the dqm. + * This is used to prevent dev->dqm->ops.process_termination() from + * being called twice when it is already called in IOMMU callback + * function. */ - bool reset_wavefronts; + bool already_dequeued; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -659,6 +669,8 @@ struct process_queue_node { struct list_head process_queue_list; }; +void kfd_process_dequeue_from_device(struct kfd_process_device *pdd); +void kfd_process_dequeue_from_all_devices(struct kfd_process *p); int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); void pqm_uninit(struct process_queue_manager *pqm); int pqm_create_queue(struct process_queue_manager *pqm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1325f88591ae..3ccb3b53216e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -171,9 +171,6 @@ static void kfd_process_wq_release(struct work_struct *work) pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", pdd->dev->id, p->pasid); - if (pdd->reset_wavefronts) - dbgdev_wave_reset_wavefronts(pdd->dev, p); - if (pdd->bound == PDD_BOUND) amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); @@ -237,24 +234,17 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, mutex_lock(&p->mutex); - /* In case our notifier is called before IOMMU notifier */ + kfd_process_dequeue_from_all_devices(p); pqm_uninit(&p->pqm); /* Iterate over all process device data structure and check - * if we should delete debug managers and reset all wavefronts + * if we should delete debug managers */ - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + list_for_each_entry(pdd, &p->per_device_data, per_device_list) if ((pdd->dev->dbgmgr) && (pdd->dev->dbgmgr->pasid == p->pasid)) kfd_dbgmgr_destroy(pdd->dev->dbgmgr); - if (pdd->reset_wavefronts) { - pr_warn("Resetting all wave fronts\n"); - dbgdev_wave_reset_wavefronts(pdd->dev, p); - pdd->reset_wavefronts = false; - } - } - mutex_unlock(&p->mutex); /* @@ -368,8 +358,9 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, INIT_LIST_HEAD(&pdd->qpd.queues_list); INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); pdd->qpd.dqm = dev->dqm; - pdd->reset_wavefronts = false; + pdd->process = p; pdd->bound = PDD_UNBOUND; + pdd->already_dequeued = false; list_add(&pdd->per_device_list, &p->per_device_data); } @@ -498,19 +489,12 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) kfd_dbgmgr_destroy(dev->dbgmgr); - pqm_uninit(&p->pqm); - pdd = kfd_get_process_device_data(dev, p); - - if (!pdd) { - mutex_unlock(&p->mutex); - return; - } - - if (pdd->reset_wavefronts) { - dbgdev_wave_reset_wavefronts(pdd->dev, p); - pdd->reset_wavefronts = false; - } + if (pdd) + /* For GPU relying on IOMMU, we need to dequeue here + * when PASID is still bound. + */ + kfd_process_dequeue_from_device(pdd); mutex_unlock(&p->mutex); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 68fe0d99f6c2..31ec3ca9d46d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -63,6 +63,25 @@ static int find_available_queue_slot(struct process_queue_manager *pqm, return 0; } +void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) +{ + struct kfd_dev *dev = pdd->dev; + + if (pdd->already_dequeued) + return; + + dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); + pdd->already_dequeued = true; +} + +void kfd_process_dequeue_from_all_devices(struct kfd_process *p) +{ + struct kfd_process_device *pdd; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) + kfd_process_dequeue_from_device(pdd); +} + int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) { INIT_LIST_HEAD(&pqm->queues); @@ -78,21 +97,14 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) void pqm_uninit(struct process_queue_manager *pqm) { - int retval; struct process_queue_node *pqn, *next; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { - retval = pqm_destroy_queue( - pqm, - (pqn->q != NULL) ? - pqn->q->properties.queue_id : - pqn->kq->queue->properties.queue_id); - - if (retval != 0) { - pr_err("failed to destroy queue\n"); - return; - } + uninit_queue(pqn->q); + list_del(&pqn->process_queue_list); + kfree(pqn); } + kfree(pqm->queue_slot_bitmap); pqm->queue_slot_bitmap = NULL; } @@ -290,9 +302,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (pqn->q) { dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); - if (retval != 0) - return retval; - uninit_queue(pqn->q); } From e6f791b1b068b168c6f5203f29040b972d7fbc20 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 27 Sep 2017 00:09:53 -0400 Subject: [PATCH 14/17] drm/amdkfd: Compress unnecessary function parameters Signed-off-by: Yong Zhao Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 +-- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 5 +++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 -- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 10 +++------- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 660b3fbade41..0ef82b229754 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -282,8 +282,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, - 0, q_properties.type, &queue_id); + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id); if (err != 0) goto err_create_queue; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 7d5635fdd6e7..c407f6bd9956 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -184,9 +184,10 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) struct kernel_queue *kq = NULL; int status; + properties.type = KFD_QUEUE_TYPE_DIQ; + status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, - &properties, 0, KFD_QUEUE_TYPE_DIQ, - &qid); + &properties, &qid); if (status) { pr_err("Failed to create DIQ\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 2e13f5d46d25..7f141730de41 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -677,8 +677,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct kfd_dev *dev, struct file *f, struct queue_properties *properties, - unsigned int flags, - enum kfd_queue_type type, unsigned int *qid); int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 31ec3ca9d46d..63c569b62cee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -142,20 +142,17 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct kfd_dev *dev, struct file *f, struct queue_properties *properties, - unsigned int flags, - enum kfd_queue_type type, unsigned int *qid) { int retval; struct kfd_process_device *pdd; - struct queue_properties q_properties; struct queue *q; struct process_queue_node *pqn; struct kernel_queue *kq; int num_queues = 0; struct queue *cur; + enum kfd_queue_type type = properties->type; - memcpy(&q_properties, properties, sizeof(struct queue_properties)); q = NULL; kq = NULL; @@ -206,7 +203,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; } - retval = create_cp_queue(pqm, dev, &q, &q_properties, f, *qid); + retval = create_cp_queue(pqm, dev, &q, properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -243,9 +240,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, list_add(&pqn->process_queue_list, &pqm->queues); if (q) { - *properties = q->properties; pr_debug("PQM done creating queue\n"); - print_queue_properties(properties); + print_queue_properties(&q->properties); } return retval; From bc920fd4f4350a2e3094c165a77798d721f39e7b Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 27 Sep 2017 00:09:54 -0400 Subject: [PATCH 15/17] drm/amdkfd: Clean up process queue management Removed unused num_concurrent_processes. Implemented counting of queues in QPD. This makes counting the queue list repeatedly in several places unnecessary. Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 5 +++++ drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 7 +------ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 - drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 9 ++------- 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index dd7e44593cfe..c0685cd64d12 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -189,6 +189,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, } list_add(&q->list, &qpd->queues_list); + qpd->queue_count++; if (q->properties.is_active) dqm->queue_count++; @@ -347,6 +348,7 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, deallocate_vmid(dqm, qpd, q); } + qpd->queue_count--; if (q->properties.is_active) dqm->queue_count--; @@ -856,6 +858,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, goto out; list_add(&q->list, &qpd->queues_list); + qpd->queue_count++; if (q->properties.is_active) { dqm->queue_count++; retval = execute_queues_cpsch(dqm, @@ -1014,6 +1017,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, dqm->sdma_queue_count--; list_del(&q->list); + qpd->queue_count--; if (q->properties.is_active) dqm->queue_count--; @@ -1204,6 +1208,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, goto out; } list_del(&q->list); + qpd->queue_count--; mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index e5a15babb4f3..5d1770e577e9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -140,8 +140,6 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, struct qcm_process_device *qpd) { struct pm4_mes_map_process *packet; - struct queue *cur; - uint32_t num_queues; packet = (struct pm4_mes_map_process *)buffer; @@ -156,10 +154,7 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, packet->bitfields10.gds_size = qpd->gds_size; packet->bitfields10.num_gws = qpd->num_gws; packet->bitfields10.num_oac = qpd->num_oac; - num_queues = 0; - list_for_each_entry(cur, &qpd->queues_list, list) - num_queues++; - packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues; + packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; packet->sh_mem_config = qpd->sh_mem_config; packet->sh_mem_bases = qpd->sh_mem_bases; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 7f141730de41..7d86ec9790d3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -405,7 +405,6 @@ struct scheduling_resources { struct process_queue_manager { /* data */ struct kfd_process *process; - unsigned int num_concurrent_processes; struct list_head queues; unsigned long *queue_slot_bitmap; }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 63c569b62cee..88ad178bffb6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -149,8 +149,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct queue *q; struct process_queue_node *pqn; struct kernel_queue *kq; - int num_queues = 0; - struct queue *cur; enum kfd_queue_type type = properties->type; q = NULL; @@ -168,11 +166,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, * If we are just about to create DIQ, the is_debug flag is not set yet * Hence we also check the type as well */ - if ((pdd->qpd.is_debug) || - (type == KFD_QUEUE_TYPE_DIQ)) { - list_for_each_entry(cur, &pdd->qpd.queues_list, list) - num_queues++; - if (num_queues >= dev->device_info->max_no_of_hqd/2) + if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) { + if (pdd->qpd.queue_count >= dev->device_info->max_no_of_hqd/2) return -ENOSPC; } From 36c2d7eb5e99a4d765b1ec241823d563c71b1125 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 27 Sep 2017 00:09:55 -0400 Subject: [PATCH 16/17] drm/amdkfd: Limit queue number per process and device to 127 HWS uses bit 7 in the queue number of the map process packet for an undocumented feature. Therefore the queue number per process and device must be 127 or less. Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 88ad178bffb6..5129dc139219 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -150,6 +150,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct process_queue_node *pqn; struct kernel_queue *kq; enum kfd_queue_type type = properties->type; + unsigned int max_queues = 127; /* HWS limit */ q = NULL; kq = NULL; @@ -166,10 +167,11 @@ int pqm_create_queue(struct process_queue_manager *pqm, * If we are just about to create DIQ, the is_debug flag is not set yet * Hence we also check the type as well */ - if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) { - if (pdd->qpd.queue_count >= dev->device_info->max_no_of_hqd/2) - return -ENOSPC; - } + if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) + max_queues = dev->device_info->max_no_of_hqd/2; + + if (pdd->qpd.queue_count >= max_queues) + return -ENOSPC; retval = find_available_queue_slot(pqm, qid); if (retval != 0) From e139cd2a2ff842bd4a5d089b9d251ca62f58ecac Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 27 Sep 2017 00:09:56 -0400 Subject: [PATCH 17/17] drm/amdkfd: Improve multiple SDMA queues support per process HWS does not support over-subscription and the scheduler can not internally modify the engine. Driver needs to program the correct engine ID. Fix the queue and engine selection to create queues on alternating SDMA engines. This allows concurrent bi-directional DMA transfers in a process that creates two SDMA queues. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 29 ++++++++++--------- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 2 +- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c0685cd64d12..da3b74315acf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -622,8 +622,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, if (retval) return retval; - q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; - q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM; + q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; + q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; pr_debug("SDMA id is: %d\n", q->sdma_id); pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); @@ -705,6 +705,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; dqm->active_runlist = false; + dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; retval = dqm->ops_asic_specific.initialize(dqm); if (retval) mutex_destroy(&dqm->lock); @@ -812,14 +813,6 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, mutex_unlock(&dqm->lock); } -static void select_sdma_engine_id(struct queue *q) -{ - static int sdma_id; - - q->sdma_id = sdma_id; - sdma_id = (sdma_id + 1) % 2; -} - static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd, int *allocate_vmid) { @@ -840,9 +833,15 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, goto out; } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - select_sdma_engine_id(q); - + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + retval = allocate_sdma_queue(dqm, &q->sdma_id); + if (retval != 0) + goto out; + q->properties.sdma_queue_id = + q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; + q->properties.sdma_engine_id = + q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + } mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1013,8 +1012,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, goto failed; } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; + deallocate_sdma_queue(dqm, q->sdma_id); + } list_del(&q->list); qpd->queue_count--; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 5d1770e577e9..16da8ad02d8b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -203,7 +203,7 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, queue_type__mes_map_queues__debug_interface_queue_vi; break; case KFD_QUEUE_TYPE_SDMA: - packet->bitfields2.engine_sel = + packet->bitfields2.engine_sel = q->properties.sdma_engine_id + engine_sel__mes_map_queues__sdma0_vi; use_static = false; /* no static queues under SDMA */ break;