linux/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
Linus Torvalds 93b694d096 drm next for 5.10-rc1
New driver:
 Cadence MHDP8546 DisplayPort bridge driver
 
 core:
 - cross-driver scatterlist cleanups
 - devm_drm conversions
 - remove drm_dev_init
 - devm_drm_dev_alloc conversion
 
 ttm:
 - lots of refactoring and cleanups
 
 bridges:
 - chained bridge support in more drivers
 
 panel:
 - misc new panels
 
 scheduler:
 - cleanup priority levels
 
 displayport:
 - refactor i915 code into helpers for nouveau
 
 i915:
 - split into display and GT trees
 - WW locking refactoring in GEM
 - execbuf2 extension mechanism
 - syncobj timeline support
 - GEN 12 HOBL display powersaving
 - Rocket Lake display additions
 - Disable FBC on Tigerlake
 - Tigerlake Type-C + DP improvements
 - Hotplug interrupt refactoring
 
 amdgpu:
 - Sienna Cichlid updates
 - Navy Flounder updates
 - DCE6 (SI) support for DC
 - Plane rotation enabled
 - TMZ state info ioctl
 - PCIe DPC recovery support
 - DC interrupt handling refactor
 - OLED panel fixes
 
 amdkfd:
 - add SMI events for thermal throttling
 - SMI interface events ioctl update
 - process eviction counters
 
 radeon:
 - move to dma_ for allocations
 - expose sclk via sysfs
 
 msm:
 - DSI support for sm8150/sm8250
 - per-process GPU pagetable support
 - Displayport support
 
 mediatek:
 - move HDMI phy driver to PHY
 - convert mtk-dpi to bridge API
 - disable mt2701 tmds
 
 tegra:
 - bridge support
 
 exynos:
 - misc cleanups
 
 vc4:
 - dual display cleanups
 
 ast:
 - cleanups
 
 gma500:
 - conversion to GPIOd API
 
 hisilicon:
 - misc reworks
 
 ingenic:
 - clock handling and format improvements
 
 mcde:
 - DSI support
 
 mgag200:
 - desktop g200 support
 
 mxsfb:
 - i.MX7 + i.MX8M
 - alpha plane support
 
 panfrost:
 - devfreq support
 - amlogic SoC support
 
 ps8640:
 - EDID from eDP retrieval
 
 tidss:
 - AM65xx YUV workaround
 
 virtio:
 - virtio-gpu exported resources
 
 rcar-du:
 - R8A7742, R8A774E1 and R8A77961 support
 - YUV planar format fixes
 - non-visible plane handling
 - VSP device reference count fix
 - Kconfig fix to avoid displaying disabled options in .config
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJfh579AAoJEAx081l5xIa+GqoP/0amz+ZN7y/L7+f32CRinJ7/
 3e4xjXNDmtWG4Whe/WKjlYmbAcvSdWV/4HYpurW2BFJnOAB/5lIqYcS/PyqErPzA
 w4EpRoJ+ZdFgmlDH0vdsDwPLT/HFmhUN9AopNkoZpbSMxrManSj5QgmePXyiKReP
 Q+ZAK5UW5AdOVY4bgXUSEkVq2eilCLXf+bSBR/LrVQuNgu7GULX8SIy/Y1CuMtv8
 LgzzjLKfIZaIWC+F/RU7BxJ7YnrVq7z7yXnUx8j2416+k/Wwe+BeSUCSZstT7q9G
 UkX8jWfR7ZKqhwP+UQeSwDbHkALz7lv88nyjQdxJZ3SrXRe4hy14YjxnR4maeNAj
 3TAYSdcAMWyRHqeEZIZ7Hj5sQtTq5OZAoIjxzH3vpVdAnnAkcWoF77pqxV8XPqTC
 nw40DihAxQOshGwMkjd5DqkEwnMv43Hs1WTVYu9dPTOfOdqPNt+Vqp7Xl9Z46+kV
 k6PDcx60T9ayDW1QZ6MoIXHta9E7ixzu7gYBL3vP4LuporY0uNG3bzF3CMvof1BK
 sHYcYTdZkqbTD2d6rHV+TbpPQXgTtlej9qVlQM4SeX37Xtc7LxCYpnpUHKz2S/fK
 1vyeGPgdytHblwlxwZOPZ4R2I/HTfnITdr4kMcJHhxAsEewfW1Rd4+stQqVJ2Mph
 Vz+CFP2BngivGFz5vuky
 =4H8J
 -----END PGP SIGNATURE-----

Merge tag 'drm-next-2020-10-15' of git://anongit.freedesktop.org/drm/drm

Pull drm updates from Dave Airlie:
 "Not a major amount of change, the i915 trees got split into display
  and gt trees to better facilitate higher level review, and there's a
  major refactoring of i915 GEM locking to use more core kernel concepts
  (like ww-mutexes). msm gets per-process pagetables, older AMD SI cards
  get DC support, nouveau got a bump in displayport support with common
  code extraction from i915.

  Outside of drm this contains a couple of patches for hexint
  moduleparams which you've acked, and a virtio common code tree that
  you should also get via it's regular path.

  New driver:
   - Cadence MHDP8546 DisplayPort bridge driver

  core:
   - cross-driver scatterlist cleanups
   - devm_drm conversions
   - remove drm_dev_init
   - devm_drm_dev_alloc conversion

  ttm:
   - lots of refactoring and cleanups

  bridges:
   - chained bridge support in more drivers

  panel:
   - misc new panels

  scheduler:
   - cleanup priority levels

  displayport:
   - refactor i915 code into helpers for nouveau

  i915:
   - split into display and GT trees
   - WW locking refactoring in GEM
   - execbuf2 extension mechanism
   - syncobj timeline support
   - GEN 12 HOBL display powersaving
   - Rocket Lake display additions
   - Disable FBC on Tigerlake
   - Tigerlake Type-C + DP improvements
   - Hotplug interrupt refactoring

  amdgpu:
   - Sienna Cichlid updates
   - Navy Flounder updates
   - DCE6 (SI) support for DC
   - Plane rotation enabled
   - TMZ state info ioctl
   - PCIe DPC recovery support
   - DC interrupt handling refactor
   - OLED panel fixes

  amdkfd:
   - add SMI events for thermal throttling
   - SMI interface events ioctl update
   - process eviction counters

  radeon:
   - move to dma_ for allocations
   - expose sclk via sysfs

  msm:
   - DSI support for sm8150/sm8250
   - per-process GPU pagetable support
   - Displayport support

  mediatek:
   - move HDMI phy driver to PHY
   - convert mtk-dpi to bridge API
   - disable mt2701 tmds

  tegra:
   - bridge support

  exynos:
   - misc cleanups

  vc4:
   - dual display cleanups

  ast:
   - cleanups

  gma500:
   - conversion to GPIOd API

  hisilicon:
   - misc reworks

  ingenic:
   - clock handling and format improvements

  mcde:
   - DSI support

  mgag200:
   - desktop g200 support

  mxsfb:
   - i.MX7 + i.MX8M
   - alpha plane support

  panfrost:
   - devfreq support
   - amlogic SoC support

  ps8640:
   - EDID from eDP retrieval

  tidss:
   - AM65xx YUV workaround

  virtio:
   - virtio-gpu exported resources

  rcar-du:
   - R8A7742, R8A774E1 and R8A77961 support
   - YUV planar format fixes
   - non-visible plane handling
   - VSP device reference count fix
   - Kconfig fix to avoid displaying disabled options in .config"

* tag 'drm-next-2020-10-15' of git://anongit.freedesktop.org/drm/drm: (1494 commits)
  drm/ingenic: Fix bad revert
  drm/amdgpu: Fix invalid number of character '{' in amdgpu_acpi_init
  drm/amdgpu: Remove warning for virtual_display
  drm/amdgpu: kfd_initialized can be static
  drm/amd/pm: setup APU dpm clock table in SMU HW initialization
  drm/amdgpu: prevent spurious warning
  drm/amdgpu/swsmu: fix ARC build errors
  drm/amd/display: Fix OPTC_DATA_FORMAT programming
  drm/amd/display: Don't allow pstate if no support in blank
  drm/panfrost: increase readl_relaxed_poll_timeout values
  MAINTAINERS: Update entry for st7703 driver after the rename
  Revert "gpu/drm: ingenic: Add option to mmap GEM buffers cached"
  drm/amd/display: HDMI remote sink need mode validation for Linux
  drm/amd/display: Change to correct unit on audio rate
  drm/amd/display: Avoid set zero in the requested clk
  drm/amdgpu: align frag_end to covered address space
  drm/amdgpu: fix NULL pointer dereference for Renoir
  drm/vmwgfx: fix regression in thp code due to ttm init refactor.
  drm/amdgpu/swsmu: add interrupt work handler for smu11 parts
  drm/amdgpu/swsmu: add interrupt work function
  ...
2020-10-15 10:46:16 -07:00

358 lines
9.2 KiB
C

/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/printk.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/amd-iommu.h>
#include "kfd_priv.h"
#include "kfd_dbgmgr.h"
#include "kfd_topology.h"
#include "kfd_iommu.h"
static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
/** kfd_iommu_check_device - Check whether IOMMU is available for device
*/
int kfd_iommu_check_device(struct kfd_dev *kfd)
{
struct amd_iommu_device_info iommu_info;
int err;
if (!kfd->use_iommu_v2)
return -ENODEV;
iommu_info.flags = 0;
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
if (err)
return err;
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags)
return -ENODEV;
return 0;
}
/** kfd_iommu_device_init - Initialize IOMMU for device
*/
int kfd_iommu_device_init(struct kfd_dev *kfd)
{
struct amd_iommu_device_info iommu_info;
unsigned int pasid_limit;
int err;
if (!kfd->use_iommu_v2)
return 0;
iommu_info.flags = 0;
err = amd_iommu_device_info(kfd->pdev, &iommu_info);
if (err < 0) {
dev_err(kfd_device,
"error getting iommu info. is the iommu enabled?\n");
return -ENODEV;
}
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
dev_err(kfd_device,
"error required iommu flags ats %i, pri %i, pasid %i\n",
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
!= 0);
return -ENODEV;
}
pasid_limit = min_t(unsigned int,
(unsigned int)(1 << kfd->device_info->max_pasid_bits),
iommu_info.max_pasids);
if (!kfd_set_pasid_limit(pasid_limit)) {
dev_err(kfd_device, "error setting pasid limit\n");
return -EBUSY;
}
return 0;
}
/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process
*
* Binds the given process to the given device using its PASID. This
* enables IOMMUv2 address translation for the process on the device.
*
* This function assumes that the process mutex is held.
*/
int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
{
struct kfd_dev *dev = pdd->dev;
struct kfd_process *p = pdd->process;
int err;
if (!dev->use_iommu_v2 || pdd->bound == PDD_BOUND)
return 0;
if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
return -EINVAL;
}
err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
if (!err)
pdd->bound = PDD_BOUND;
return err;
}
/** kfd_iommu_unbind_process - Unbind process from all devices
*
* This removes all IOMMU device bindings of the process. To be used
* before process termination.
*/
void kfd_iommu_unbind_process(struct kfd_process *p)
{
struct kfd_process_device *pdd;
list_for_each_entry(pdd, &p->per_device_data, per_device_list)
if (pdd->bound == PDD_BOUND)
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
}
/* Callback for process shutdown invoked by the IOMMU driver */
static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, u32 pasid)
{
struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
struct kfd_process *p;
struct kfd_process_device *pdd;
if (!dev)
return;
/*
* Look for the process that matches the pasid. If there is no such
* process, we either released it in amdkfd's own notifier, or there
* is a bug. Unfortunately, there is no way to tell...
*/
p = kfd_lookup_process_by_pasid(pasid);
if (!p)
return;
pr_debug("Unbinding process 0x%x from IOMMU\n", pasid);
mutex_lock(kfd_get_dbgmgr_mutex());
if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
kfd_dbgmgr_destroy(dev->dbgmgr);
dev->dbgmgr = NULL;
}
}
mutex_unlock(kfd_get_dbgmgr_mutex());
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p);
if (pdd)
/* For GPU relying on IOMMU, we need to dequeue here
* when PASID is still bound.
*/
kfd_process_dequeue_from_device(pdd);
mutex_unlock(&p->mutex);
kfd_unref_process(p);
}
/* This function called by IOMMU driver on PPR failure */
static int iommu_invalid_ppr_cb(struct pci_dev *pdev, u32 pasid,
unsigned long address, u16 flags)
{
struct kfd_dev *dev;
dev_warn_ratelimited(kfd_device,
"Invalid PPR device %x:%x.%x pasid 0x%x address 0x%lX flags 0x%X",
pdev->bus->number,
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn),
pasid,
address,
flags);
dev = kfd_device_by_pci_dev(pdev);
if (!WARN_ON(!dev))
kfd_signal_iommu_event(dev, pasid, address,
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
return AMD_IOMMU_INV_PRI_RSP_INVALID;
}
/*
* Bind processes do the device that have been temporarily unbound
* (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
*/
static int kfd_bind_processes_to_device(struct kfd_dev *kfd)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
unsigned int temp;
int err = 0;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(kfd, p);
if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
mutex_unlock(&p->mutex);
continue;
}
err = amd_iommu_bind_pasid(kfd->pdev, p->pasid,
p->lead_thread);
if (err < 0) {
pr_err("Unexpected pasid 0x%x binding failure\n",
p->pasid);
mutex_unlock(&p->mutex);
break;
}
pdd->bound = PDD_BOUND;
mutex_unlock(&p->mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
return err;
}
/*
* Mark currently bound processes as PDD_BOUND_SUSPENDED. These
* processes will be restored to PDD_BOUND state in
* kfd_bind_processes_to_device.
*/
static void kfd_unbind_processes_from_device(struct kfd_dev *kfd)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(kfd, p);
if (WARN_ON(!pdd)) {
mutex_unlock(&p->mutex);
continue;
}
if (pdd->bound == PDD_BOUND)
pdd->bound = PDD_BOUND_SUSPENDED;
mutex_unlock(&p->mutex);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
/** kfd_iommu_suspend - Prepare IOMMU for suspend
*
* This unbinds processes from the device and disables the IOMMU for
* the device.
*/
void kfd_iommu_suspend(struct kfd_dev *kfd)
{
if (!kfd->use_iommu_v2)
return;
kfd_unbind_processes_from_device(kfd);
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
}
/** kfd_iommu_resume - Restore IOMMU after resume
*
* This reinitializes the IOMMU for the device and re-binds previously
* suspended processes to the device.
*/
int kfd_iommu_resume(struct kfd_dev *kfd)
{
unsigned int pasid_limit;
int err;
if (!kfd->use_iommu_v2)
return 0;
pasid_limit = kfd_get_pasid_limit();
err = amd_iommu_init_device(kfd->pdev, pasid_limit);
if (err)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback);
amd_iommu_set_invalid_ppr_cb(kfd->pdev,
iommu_invalid_ppr_cb);
err = kfd_bind_processes_to_device(kfd);
if (err) {
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
return err;
}
return 0;
}
extern bool amd_iommu_pc_supported(void);
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology
*/
int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
{
struct kfd_perf_properties *props;
if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT))
return 0;
if (!amd_iommu_pc_supported())
return 0;
props = kfd_alloc_struct(props);
if (!props)
return -ENOMEM;
strcpy(props->block_name, "iommu");
props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
amd_iommu_pc_get_max_counters(0); /* assume one iommu */
list_add_tail(&props->list, &kdev->perf_props);
return 0;
}