Merge tag 'drm-next-2020-06-02' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Highlights: - Core DRM had a lot of refactoring around managed drm resources to make drivers simpler. - Intel Tigerlake support is on by default - amdgpu now support p2p PCI buffer sharing and encrypted GPU memory Details: core: - uapi: error out EBUSY when existing master - uapi: rework SET/DROP MASTER permission handling - remove drm_pci.h - drm_pci* are now legacy - introduced managed DRM resources - subclassing support for drm_framebuffer - simple encoder helper - edid improvements - vblank + writeback documentation improved - drm/mm - optimise tree searches - port drivers to use devm_drm_dev_alloc dma-buf: - add flag for p2p buffer support mst: - ACT timeout improvements - remove drm_dp_mst_has_audio - don't use 2nd TX slot - spec recommends against it bridge: - dw-hdmi various improvements - chrontel ch7033 support - fix stack issues with old gcc hdmi: - add unpack function for drm infoframe fbdev: - misc fbdev driver fixes i915: - uapi: global sseu pinning - uapi: OA buffer polling - uapi: remove generated perf code - uapi: per-engine default property values in sysfs - Tigerlake GEN12 enabled. - Lots of gem refactoring - Tigerlake enablement patches - move to drm_device logging - Icelake gamma HW readout - push MST link retrain to hotplug work - bandwidth atomic helpers - ICL fixes - RPS/GT refactoring - Cherryview full-ppgtt support - i915 locking guidelines documented - require linear fb stride to be 512 multiple on gen9 - Tigerlake SAGV support amdgpu: - uapi: encrypted GPU memory handling - uapi: add MEM_SYNC IB flag - p2p dma-buf support - export VRAM dma-bufs - FRU chip access support - RAS/SR-IOV updates - Powerplay locking fixes - VCN DPG (powergating) enablement - GFX10 clockgating fixes - DC fixes - GPU reset fixes - navi SDMA fix - expose FP16 for modesetting - DP 1.4 compliance fixes - gfx10 soft recovery - Improved Critical Thermal Faults handling - resizable BAR on gmc10 amdkfd: - uapi: GWS resource management - track GPU memory per process - report PCI domain in topology radeon: - safe reg list generator fixes nouveau: - HD audio fixes on recent systems - vGPU detection (fail probe if we're on one, for now) - Interlaced mode fixes (mostly avoidance on Turing, which doesn't support it) - SVM improvements/fixes - NVIDIA format modifier support - Misc other fixes. adv7511: - HDMI SPDIF support ast: - allocate crtc state size - fix double assignment - fix suspend bochs: - drop connector register cirrus: - move to tiny drivers. exynos: - fix imported dma-buf mapping - enable runtime PM - fixes and cleanups mediatek: - DPI pin mode swap - config mipi_tx current/impedance lima: - devfreq + cooling device support - task handling improvements - runtime PM support pl111: - vexpress init improvements - fix module auto-load rcar-du: - DT bindings conversion to YAML - Planes zpos sanity check and fix - MAINTAINERS entry for LVDS panel driver mcde: - fix return value mgag200: - use managed config init stm: - read endpoints from DT vboxvideo: - use PCI managed functions - drop WC mtrr vkms: - enable cursor by default rockchip: - afbc support virtio: - various cleanups qxl: - fix cursor notify port hisilicon: - 128-byte stride alignment fix sun4i: - improved format handling" * tag 'drm-next-2020-06-02' of git://anongit.freedesktop.org/drm/drm: (1401 commits) drm/amd/display: Fix potential integer wraparound resulting in a hang drm/amd/display: drop cursor position check in atomic test drm/amdgpu: fix device attribute node create failed with multi gpu drm/nouveau: use correct conflicting framebuffer API drm/vblank: Fix -Wformat compile warnings on some arches drm/amdgpu: Sync with VM root BO when switching VM to CPU update mode drm/amd/display: Handle GPU reset for DC block drm/amdgpu: add apu flags (v2) drm/amd/powerpay: Disable gfxoff when setting manual mode on picasso and raven drm/amdgpu: fix pm sysfs node handling (v2) drm/amdgpu: move gpu_info parsing after common early init drm/amdgpu: move discovery gfx config fetching drm/nouveau/dispnv50: fix runtime pm imbalance on error drm/nouveau: fix runtime pm imbalance on error drm/nouveau: fix runtime pm imbalance on error drm/nouveau/debugfs: fix runtime pm imbalance on error drm/nouveau/nouveau/hmm: fix migrate zero page to GPU drm/nouveau/nouveau/hmm: fix nouveau_dmem_chunk allocations drm/nouveau/kms/nv50-: Share DP SST mode_valid() handling with MST drm/nouveau/kms/nv50-: Move 8BPC limit for MST into nv50_mstc_get_modes() ...
This commit is contained in:
@ -215,6 +215,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
|
||||
}
|
||||
|
||||
q_properties->is_interop = false;
|
||||
q_properties->is_gws = false;
|
||||
q_properties->queue_percent = args->queue_percentage;
|
||||
q_properties->priority = args->queue_priority;
|
||||
q_properties->queue_address = args->ring_base_address;
|
||||
@ -1322,6 +1323,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
/* Update the VRAM usage count */
|
||||
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
|
||||
WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
|
||||
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
|
||||
@ -1337,7 +1342,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
||||
return 0;
|
||||
|
||||
err_free:
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
|
||||
err_unlock:
|
||||
mutex_unlock(&p->mutex);
|
||||
return err;
|
||||
@ -1351,6 +1356,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
|
||||
void *mem;
|
||||
struct kfd_dev *dev;
|
||||
int ret;
|
||||
uint64_t size = 0;
|
||||
|
||||
dev = kfd_device_by_id(GET_GPU_ID(args->handle));
|
||||
if (!dev)
|
||||
@ -1373,7 +1379,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
|
||||
}
|
||||
|
||||
ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
|
||||
(struct kgd_mem *)mem);
|
||||
(struct kgd_mem *)mem, &size);
|
||||
|
||||
/* If freeing the buffer failed, leave the handle in place for
|
||||
* clean-up during process tear-down.
|
||||
@ -1382,6 +1388,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
|
||||
kfd_process_device_remove_obj_handle(
|
||||
pdd, GET_IDR_HANDLE(args->handle));
|
||||
|
||||
WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
|
||||
|
||||
err_unlock:
|
||||
mutex_unlock(&p->mutex);
|
||||
return ret;
|
||||
@ -1584,6 +1592,45 @@ copy_from_user_failed:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_alloc_queue_gws(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
int retval;
|
||||
struct kfd_ioctl_alloc_queue_gws_args *args = data;
|
||||
struct queue *q;
|
||||
struct kfd_dev *dev;
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
q = pqm_get_user_queue(&p->pqm, args->queue_id);
|
||||
|
||||
if (q) {
|
||||
dev = q->device;
|
||||
} else {
|
||||
retval = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (!dev->gws) {
|
||||
retval = -ENODEV;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
|
||||
retval = -ENODEV;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
args->first_gws = 0;
|
||||
return retval;
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&p->mutex);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_get_dmabuf_info(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
@ -1687,7 +1734,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
|
||||
return 0;
|
||||
|
||||
err_free:
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
|
||||
err_unlock:
|
||||
mutex_unlock(&p->mutex);
|
||||
return r;
|
||||
@ -1786,6 +1833,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
|
||||
kfd_ioctl_import_dmabuf, 0),
|
||||
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
|
||||
kfd_ioctl_alloc_queue_gws, 0),
|
||||
};
|
||||
|
||||
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
|
||||
|
@ -502,7 +502,7 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
|
||||
num_nodes = crat_table->num_domains;
|
||||
image_len = crat_table->length;
|
||||
|
||||
pr_info("Parsing CRAT table with %d nodes\n", num_nodes);
|
||||
pr_debug("Parsing CRAT table with %d nodes\n", num_nodes);
|
||||
|
||||
for (node_id = 0; node_id < num_nodes; node_id++) {
|
||||
top_dev = kfd_create_topology_device(device_list);
|
||||
|
@ -569,6 +569,23 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
|
||||
}
|
||||
}
|
||||
|
||||
static int kfd_gws_init(struct kfd_dev *kfd)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
|
||||
return 0;
|
||||
|
||||
if (hws_gws_support
|
||||
|| (kfd->device_info->asic_family >= CHIP_VEGA10
|
||||
&& kfd->device_info->asic_family <= CHIP_RAVEN
|
||||
&& kfd->mec2_fw_version >= 0x1b3))
|
||||
ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
|
||||
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
struct drm_device *ddev,
|
||||
const struct kgd2kfd_shared_resources *gpu_resources)
|
||||
@ -578,6 +595,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
kfd->ddev = ddev;
|
||||
kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
|
||||
KGD_ENGINE_MEC1);
|
||||
kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
|
||||
KGD_ENGINE_MEC2);
|
||||
kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
|
||||
KGD_ENGINE_SDMA1);
|
||||
kfd->shared_resources = *gpu_resources;
|
||||
@ -598,13 +617,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
} else
|
||||
kfd->max_proc_per_quantum = hws_max_conc_proc;
|
||||
|
||||
/* Allocate global GWS that is shared by all KFD processes */
|
||||
if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
|
||||
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
|
||||
dev_err(kfd_device, "Could not allocate %d gws\n",
|
||||
amdgpu_amdkfd_get_num_gws(kfd->kgd));
|
||||
goto out;
|
||||
}
|
||||
/* calculate max size of mqds needed for queues */
|
||||
size = max_num_of_queues_per_device *
|
||||
kfd->device_info->mqd_size_aligned;
|
||||
@ -662,6 +674,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
goto device_queue_manager_error;
|
||||
}
|
||||
|
||||
/* If supported on this device, allocate global GWS that is shared
|
||||
* by all KFD processes
|
||||
*/
|
||||
if (kfd_gws_init(kfd)) {
|
||||
dev_err(kfd_device, "Could not allocate %d gws\n",
|
||||
amdgpu_amdkfd_get_num_gws(kfd->kgd));
|
||||
goto gws_error;
|
||||
}
|
||||
|
||||
if (kfd_iommu_device_init(kfd)) {
|
||||
dev_err(kfd_device, "Error initializing iommuv2\n");
|
||||
goto device_iommu_error;
|
||||
@ -691,6 +712,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
kfd_topology_add_device_error:
|
||||
kfd_resume_error:
|
||||
device_iommu_error:
|
||||
gws_error:
|
||||
device_queue_manager_uninit(kfd->dqm);
|
||||
device_queue_manager_error:
|
||||
kfd_interrupt_exit(kfd);
|
||||
@ -701,7 +723,7 @@ kfd_doorbell_error:
|
||||
kfd_gtt_sa_init_error:
|
||||
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
|
||||
alloc_gtt_mem_failure:
|
||||
if (hws_gws_support)
|
||||
if (kfd->gws)
|
||||
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
|
||||
dev_err(kfd_device,
|
||||
"device %x:%x NOT added due to errors\n",
|
||||
@ -720,7 +742,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
|
||||
kfd_doorbell_fini(kfd);
|
||||
kfd_gtt_sa_fini(kfd);
|
||||
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
|
||||
if (hws_gws_support)
|
||||
if (kfd->gws)
|
||||
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
|
||||
}
|
||||
|
||||
|
@ -505,8 +505,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
|
||||
deallocate_vmid(dqm, qpd, q);
|
||||
}
|
||||
qpd->queue_count--;
|
||||
if (q->properties.is_active)
|
||||
if (q->properties.is_active) {
|
||||
decrement_queue_count(dqm, q->properties.type);
|
||||
if (q->properties.is_gws) {
|
||||
dqm->gws_queue_count--;
|
||||
qpd->mapped_gws_queue = false;
|
||||
}
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
@ -583,6 +588,20 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
||||
else if (!q->properties.is_active && prev_active)
|
||||
decrement_queue_count(dqm, q->properties.type);
|
||||
|
||||
if (q->gws && !q->properties.is_gws) {
|
||||
if (q->properties.is_active) {
|
||||
dqm->gws_queue_count++;
|
||||
pdd->qpd.mapped_gws_queue = true;
|
||||
}
|
||||
q->properties.is_gws = true;
|
||||
} else if (!q->gws && q->properties.is_gws) {
|
||||
if (q->properties.is_active) {
|
||||
dqm->gws_queue_count--;
|
||||
pdd->qpd.mapped_gws_queue = false;
|
||||
}
|
||||
q->properties.is_gws = false;
|
||||
}
|
||||
|
||||
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
|
||||
retval = map_queues_cpsch(dqm);
|
||||
else if (q->properties.is_active &&
|
||||
@ -631,6 +650,10 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
q->properties.type)];
|
||||
q->properties.is_active = false;
|
||||
decrement_queue_count(dqm, q->properties.type);
|
||||
if (q->properties.is_gws) {
|
||||
dqm->gws_queue_count--;
|
||||
qpd->mapped_gws_queue = false;
|
||||
}
|
||||
|
||||
if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
|
||||
continue;
|
||||
@ -744,6 +767,10 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
q->properties.type)];
|
||||
q->properties.is_active = true;
|
||||
increment_queue_count(dqm, q->properties.type);
|
||||
if (q->properties.is_gws) {
|
||||
dqm->gws_queue_count++;
|
||||
qpd->mapped_gws_queue = true;
|
||||
}
|
||||
|
||||
if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
|
||||
continue;
|
||||
@ -913,6 +940,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
|
||||
INIT_LIST_HEAD(&dqm->queues);
|
||||
dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
|
||||
dqm->active_cp_queue_count = 0;
|
||||
dqm->gws_queue_count = 0;
|
||||
|
||||
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
|
||||
int pipe_offset = pipe * get_queues_per_pipe(dqm);
|
||||
@ -1061,7 +1089,9 @@ static int set_sched_resources(struct device_queue_manager *dqm)
|
||||
break;
|
||||
}
|
||||
|
||||
res.queue_mask |= (1ull << i);
|
||||
res.queue_mask |= 1ull
|
||||
<< amdgpu_queue_mask_bit_to_set_resource_bit(
|
||||
(struct amdgpu_device *)dqm->dev->kgd, i);
|
||||
}
|
||||
res.gws_mask = ~0ull;
|
||||
res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
|
||||
@ -1082,7 +1112,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
|
||||
INIT_LIST_HEAD(&dqm->queues);
|
||||
dqm->active_queue_count = dqm->processes_count = 0;
|
||||
dqm->active_cp_queue_count = 0;
|
||||
|
||||
dqm->gws_queue_count = 0;
|
||||
dqm->active_runlist = false;
|
||||
dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
|
||||
dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
|
||||
@ -1432,6 +1462,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
|
||||
if (retval == -ETIME)
|
||||
qpd->reset_wavefronts = true;
|
||||
if (q->properties.is_gws) {
|
||||
dqm->gws_queue_count--;
|
||||
qpd->mapped_gws_queue = false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1650,8 +1684,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
||||
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
|
||||
if (q->properties.is_active)
|
||||
if (q->properties.is_active) {
|
||||
decrement_queue_count(dqm, q->properties.type);
|
||||
if (q->properties.is_gws) {
|
||||
dqm->gws_queue_count--;
|
||||
qpd->mapped_gws_queue = false;
|
||||
}
|
||||
}
|
||||
|
||||
dqm->total_queue_count--;
|
||||
}
|
||||
|
@ -182,6 +182,7 @@ struct device_queue_manager {
|
||||
unsigned int processes_count;
|
||||
unsigned int active_queue_count;
|
||||
unsigned int active_cp_queue_count;
|
||||
unsigned int gws_queue_count;
|
||||
unsigned int total_queue_count;
|
||||
unsigned int next_pipe_to_allocate;
|
||||
unsigned int *allocated_queues;
|
||||
|
@ -37,7 +37,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
|
||||
vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
|
||||
if (vmid < dev->vm_info.first_vmid_kfd ||
|
||||
vmid > dev->vm_info.last_vmid_kfd)
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
|
||||
client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
|
||||
@ -69,7 +69,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
|
||||
|
||||
/* If there is no valid PASID, it's likely a bug */
|
||||
if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
|
||||
return 0;
|
||||
return false;
|
||||
|
||||
/* Interrupt types we care about: various signals and faults.
|
||||
* They will be forwarded to a work queue (see below).
|
||||
|
@ -192,7 +192,7 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
|
||||
|
||||
dev_warn_ratelimited(kfd_device,
|
||||
"Invalid PPR device %x:%x.%x pasid 0x%x address 0x%lX flags 0x%X",
|
||||
PCI_BUS_NUM(pdev->devfn),
|
||||
pdev->bus->number,
|
||||
PCI_SLOT(pdev->devfn),
|
||||
PCI_FUNC(pdev->devfn),
|
||||
pasid,
|
||||
|
@ -126,6 +126,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
|
||||
prop.queue_size = queue_size;
|
||||
prop.is_interop = false;
|
||||
prop.is_gws = false;
|
||||
prop.priority = 1;
|
||||
prop.queue_percent = 100;
|
||||
prop.type = type;
|
||||
|
@ -41,7 +41,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
|
||||
unsigned int *rlib_size,
|
||||
bool *over_subscription)
|
||||
{
|
||||
unsigned int process_count, queue_count, compute_queue_count;
|
||||
unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
|
||||
unsigned int map_queue_size;
|
||||
unsigned int max_proc_per_quantum = 1;
|
||||
struct kfd_dev *dev = pm->dqm->dev;
|
||||
@ -49,6 +49,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
|
||||
process_count = pm->dqm->processes_count;
|
||||
queue_count = pm->dqm->active_queue_count;
|
||||
compute_queue_count = pm->dqm->active_cp_queue_count;
|
||||
gws_queue_count = pm->dqm->gws_queue_count;
|
||||
|
||||
/* check if there is over subscription
|
||||
* Note: the arbitration between the number of VMIDs and
|
||||
@ -61,7 +62,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
|
||||
max_proc_per_quantum = dev->max_proc_per_quantum;
|
||||
|
||||
if ((process_count > max_proc_per_quantum) ||
|
||||
compute_queue_count > get_cp_queues_num(pm->dqm)) {
|
||||
compute_queue_count > get_cp_queues_num(pm->dqm) ||
|
||||
gws_queue_count > 1) {
|
||||
*over_subscription = true;
|
||||
pr_debug("Over subscribed runlist\n");
|
||||
}
|
||||
|
@ -43,7 +43,7 @@ static int pm_map_process_v9(struct packet_manager *pm,
|
||||
packet->bitfields2.pasid = qpd->pqm->process->pasid;
|
||||
packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
|
||||
packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
|
||||
packet->bitfields14.num_gws = qpd->num_gws;
|
||||
packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
|
||||
packet->bitfields14.num_oac = qpd->num_oac;
|
||||
packet->bitfields14.sdma_enable = 1;
|
||||
packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
|
||||
|
@ -282,6 +282,7 @@ struct kfd_dev {
|
||||
|
||||
/* Firmware versions */
|
||||
uint16_t mec_fw_version;
|
||||
uint16_t mec2_fw_version;
|
||||
uint16_t sdma_fw_version;
|
||||
|
||||
/* Maximum process number mapped to HW scheduler */
|
||||
@ -410,6 +411,10 @@ enum KFD_QUEUE_PRIORITY {
|
||||
* @is_active: Defines if the queue is active or not. @is_active and
|
||||
* @is_evicted are protected by the DQM lock.
|
||||
*
|
||||
* @is_gws: Defines if the queue has been updated to be GWS-capable or not.
|
||||
* @is_gws should be protected by the DQM lock, since changing it can yield the
|
||||
* possibility of updating DQM state on number of GWS queues.
|
||||
*
|
||||
* @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
|
||||
* of the queue.
|
||||
*
|
||||
@ -432,6 +437,7 @@ struct queue_properties {
|
||||
bool is_interop;
|
||||
bool is_evicted;
|
||||
bool is_active;
|
||||
bool is_gws;
|
||||
/* Not relevant for user mode queues in cp scheduling */
|
||||
unsigned int vmid;
|
||||
/* Relevant only for sdma queues*/
|
||||
@ -563,6 +569,14 @@ struct qcm_process_device {
|
||||
*/
|
||||
bool reset_wavefronts;
|
||||
|
||||
/* This flag tells us if this process has a GWS-capable
|
||||
* queue that will be mapped into the runlist. It's
|
||||
* possible to request a GWS BO, but not have the queue
|
||||
* currently mapped, and this changes how the MAP_PROCESS
|
||||
* PM4 packet is configured.
|
||||
*/
|
||||
bool mapped_gws_queue;
|
||||
|
||||
/*
|
||||
* All the memory management data should be here too
|
||||
*/
|
||||
@ -615,6 +629,8 @@ enum kfd_pdd_bound {
|
||||
PDD_BOUND_SUSPENDED,
|
||||
};
|
||||
|
||||
#define MAX_VRAM_FILENAME_LEN 11
|
||||
|
||||
/* Data that is per-process-per device. */
|
||||
struct kfd_process_device {
|
||||
/*
|
||||
@ -657,6 +673,11 @@ struct kfd_process_device {
|
||||
|
||||
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
|
||||
enum kfd_pdd_bound bound;
|
||||
|
||||
/* VRAM usage */
|
||||
uint64_t vram_usage;
|
||||
struct attribute attr_vram;
|
||||
char vram_filename[MAX_VRAM_FILENAME_LEN];
|
||||
};
|
||||
|
||||
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
|
||||
@ -923,6 +944,8 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
|
||||
void *gws);
|
||||
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
|
||||
unsigned int qid);
|
||||
struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
|
||||
unsigned int qid);
|
||||
int pqm_get_wave_state(struct process_queue_manager *pqm,
|
||||
unsigned int qid,
|
||||
void __user *ctl_stack,
|
||||
|
@ -79,18 +79,22 @@ static struct kfd_procfs_tree procfs;
|
||||
static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
|
||||
char *buffer)
|
||||
{
|
||||
int val = 0;
|
||||
|
||||
if (strcmp(attr->name, "pasid") == 0) {
|
||||
struct kfd_process *p = container_of(attr, struct kfd_process,
|
||||
attr_pasid);
|
||||
val = p->pasid;
|
||||
|
||||
return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
|
||||
} else if (strncmp(attr->name, "vram_", 5) == 0) {
|
||||
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
|
||||
attr_vram);
|
||||
if (pdd)
|
||||
return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
|
||||
} else {
|
||||
pr_err("Invalid attribute");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return snprintf(buffer, PAGE_SIZE, "%d\n", val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kfd_procfs_kobj_release(struct kobject *kobj)
|
||||
@ -206,6 +210,34 @@ int kfd_procfs_add_queue(struct queue *q)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kfd_procfs_add_vram_usage(struct kfd_process *p)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
|
||||
if (!p->kobj)
|
||||
return -EFAULT;
|
||||
|
||||
/* Create proc/<pid>/vram_<gpuid> file for each GPU */
|
||||
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
|
||||
snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u",
|
||||
pdd->dev->id);
|
||||
pdd->attr_vram.name = pdd->vram_filename;
|
||||
pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
|
||||
sysfs_attr_init(&pdd->attr_vram);
|
||||
ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
|
||||
if (ret)
|
||||
pr_warn("Creating vram usage for gpu id %d failed",
|
||||
(int)pdd->dev->id);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void kfd_procfs_del_queue(struct queue *q)
|
||||
{
|
||||
if (!q)
|
||||
@ -248,7 +280,7 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
|
||||
struct kfd_dev *dev = pdd->dev;
|
||||
|
||||
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem);
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
|
||||
}
|
||||
|
||||
/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
|
||||
@ -312,7 +344,7 @@ sync_memory_failed:
|
||||
return err;
|
||||
|
||||
err_map_mem:
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem);
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
|
||||
err_alloc_mem:
|
||||
*kptr = NULL;
|
||||
return err;
|
||||
@ -411,6 +443,11 @@ struct kfd_process *kfd_create_process(struct file *filep)
|
||||
process->kobj);
|
||||
if (!process->kobj_queues)
|
||||
pr_warn("Creating KFD proc/queues folder failed");
|
||||
|
||||
ret = kfd_procfs_add_vram_usage(process);
|
||||
if (ret)
|
||||
pr_warn("Creating vram usage file for pid %d failed",
|
||||
(int)process->lead_thread->pid);
|
||||
}
|
||||
out:
|
||||
if (!IS_ERR(process))
|
||||
@ -488,7 +525,7 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
|
||||
peer_pdd->dev->kgd, mem, peer_pdd->vm);
|
||||
}
|
||||
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem);
|
||||
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL);
|
||||
kfd_process_device_remove_obj_handle(pdd, id);
|
||||
}
|
||||
}
|
||||
@ -551,6 +588,7 @@ static void kfd_process_wq_release(struct work_struct *work)
|
||||
{
|
||||
struct kfd_process *p = container_of(work, struct kfd_process,
|
||||
release_work);
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
/* Remove the procfs files */
|
||||
if (p->kobj) {
|
||||
@ -558,6 +596,10 @@ static void kfd_process_wq_release(struct work_struct *work)
|
||||
kobject_del(p->kobj_queues);
|
||||
kobject_put(p->kobj_queues);
|
||||
p->kobj_queues = NULL;
|
||||
|
||||
list_for_each_entry(pdd, &p->per_device_data, per_device_list)
|
||||
sysfs_remove_file(p->kobj, &pdd->attr_vram);
|
||||
|
||||
kobject_del(p->kobj);
|
||||
kobject_put(p->kobj);
|
||||
p->kobj = NULL;
|
||||
@ -858,10 +900,12 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||
pdd->qpd.dqm = dev->dqm;
|
||||
pdd->qpd.pqm = &p->pqm;
|
||||
pdd->qpd.evicted = 0;
|
||||
pdd->qpd.mapped_gws_queue = false;
|
||||
pdd->process = p;
|
||||
pdd->bound = PDD_UNBOUND;
|
||||
pdd->already_dequeued = false;
|
||||
pdd->runtime_inuse = false;
|
||||
pdd->vram_usage = 0;
|
||||
list_add(&pdd->per_device_list, &p->per_device_data);
|
||||
|
||||
/* Init idr used for memory handle translation */
|
||||
@ -1078,7 +1122,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
|
||||
return p;
|
||||
}
|
||||
|
||||
/* process_evict_queues - Evict all user queues of a process
|
||||
/* kfd_process_evict_queues - Evict all user queues of a process
|
||||
*
|
||||
* Eviction is reference-counted per process-device. This means multiple
|
||||
* evictions from different sources can be nested safely.
|
||||
@ -1118,7 +1162,7 @@ fail:
|
||||
return r;
|
||||
}
|
||||
|
||||
/* process_restore_queues - Restore all user queues of a process */
|
||||
/* kfd_process_restore_queues - Restore all user queues of a process */
|
||||
int kfd_process_restore_queues(struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
|
@ -476,6 +476,15 @@ struct kernel_queue *pqm_get_kernel_queue(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
|
||||
unsigned int qid)
|
||||
{
|
||||
struct process_queue_node *pqn;
|
||||
|
||||
pqn = get_queue_by_qid(pqm, qid);
|
||||
return pqn ? pqn->q : NULL;
|
||||
}
|
||||
|
||||
int pqm_get_wave_state(struct process_queue_manager *pqm,
|
||||
unsigned int qid,
|
||||
void __user *ctl_stack,
|
||||
|
@ -478,6 +478,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
|
||||
dev->node_props.device_id);
|
||||
sysfs_show_32bit_prop(buffer, "location_id",
|
||||
dev->node_props.location_id);
|
||||
sysfs_show_32bit_prop(buffer, "domain",
|
||||
dev->node_props.domain);
|
||||
sysfs_show_32bit_prop(buffer, "drm_render_minor",
|
||||
dev->node_props.drm_render_minor);
|
||||
sysfs_show_64bit_prop(buffer, "hive_id",
|
||||
@ -787,7 +789,6 @@ static int kfd_topology_update_sysfs(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
pr_info("Creating topology SYSFS entries\n");
|
||||
if (!sys_props.kobj_topology) {
|
||||
sys_props.kobj_topology =
|
||||
kfd_alloc_struct(sys_props.kobj_topology);
|
||||
@ -1048,7 +1049,6 @@ int kfd_topology_init(void)
|
||||
sys_props.generation_count++;
|
||||
kfd_update_system_properties();
|
||||
kfd_debug_print_topology();
|
||||
pr_info("Finished initializing topology\n");
|
||||
} else
|
||||
pr_err("Failed to update topology in sysfs ret=%d\n", ret);
|
||||
|
||||
@ -1303,7 +1303,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||
|
||||
dev->node_props.vendor_id = gpu->pdev->vendor;
|
||||
dev->node_props.device_id = gpu->pdev->device;
|
||||
dev->node_props.capability |=
|
||||
((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) <<
|
||||
HSA_CAP_ASIC_REVISION_SHIFT) &
|
||||
HSA_CAP_ASIC_REVISION_MASK);
|
||||
dev->node_props.location_id = pci_dev_id(gpu->pdev);
|
||||
dev->node_props.domain = pci_domain_nr(gpu->pdev->bus);
|
||||
dev->node_props.max_engine_clk_fcompute =
|
||||
amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
|
||||
dev->node_props.max_engine_clk_ccompute =
|
||||
@ -1317,7 +1322,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||
gpu->device_info->num_xgmi_sdma_engines;
|
||||
dev->node_props.num_sdma_queues_per_engine =
|
||||
gpu->device_info->num_sdma_queues_per_engine;
|
||||
dev->node_props.num_gws = (hws_gws_support &&
|
||||
dev->node_props.num_gws = (dev->gpu->gws &&
|
||||
dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
|
||||
amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
|
||||
dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
|
||||
|
@ -41,7 +41,6 @@
|
||||
#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8
|
||||
#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000
|
||||
#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12
|
||||
#define HSA_CAP_RESERVED 0xffffc000
|
||||
|
||||
#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0
|
||||
#define HSA_CAP_DOORBELL_TYPE_1_0 0x1
|
||||
@ -51,6 +50,10 @@
|
||||
#define HSA_CAP_SRAM_EDCSUPPORTED 0x00080000
|
||||
#define HSA_CAP_MEM_EDCSUPPORTED 0x00100000
|
||||
#define HSA_CAP_RASEVENTNOTIFY 0x00200000
|
||||
#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000
|
||||
#define HSA_CAP_ASIC_REVISION_SHIFT 22
|
||||
|
||||
#define HSA_CAP_RESERVED 0xfc078000
|
||||
|
||||
struct kfd_node_properties {
|
||||
uint64_t hive_id;
|
||||
@ -77,6 +80,7 @@ struct kfd_node_properties {
|
||||
uint32_t vendor_id;
|
||||
uint32_t device_id;
|
||||
uint32_t location_id;
|
||||
uint32_t domain;
|
||||
uint32_t max_engine_clk_fcompute;
|
||||
uint32_t max_engine_clk_ccompute;
|
||||
int32_t drm_render_minor;
|
||||
|
Reference in New Issue
Block a user