From 84d8cc076723058cc294f4360db6ff7758c25b74 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Mon, 6 Apr 2020 13:07:42 +1000 Subject: [PATCH] powerpc/powernv/pci: Add device to iommu group during dma_dev_setup() Historically adding devices to their respective iommu group has been handled by the post-init phb fixup for most devices. This was done because: 1) The IOMMU group is tied to the PE (usually) so we can only setup the iommu groups after we've done resource allocation since BAR location determines the device's PE, and: 2) The sysfs directory for the pci_dev needs to be available since iommu_add_device() wants to add an attribute for the iommu group. However, since commit 30d87ef8b38d ("powerpc/pci: Fix pcibios_setup_device() ordering") both conditions are met when hose->ops->dma_dev_setup() is called so there's no real need to do this in the fixup. Moving the call to iommu_add_device() into pnv_pci_ioda_dma_setup_dev() is a nice cleanup since it puts all the per-device IOMMU setup into one place. It also results in all (non-nvlink) devices getting their iommu group via a common path rather than relying on the bus notifier hack in pnv_tce_iommu_bus_notifier() to handle the adding VFs and hotplugged devices to their group. Signed-off-by: Oliver O'Halloran Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200406030745.24595-5-oohall@gmail.com --- arch/powerpc/platforms/powernv/npu-dma.c | 8 ++++ arch/powerpc/platforms/powernv/pci-ioda.c | 47 +++++++---------------- arch/powerpc/platforms/powernv/pci.c | 20 ---------- 3 files changed, 21 insertions(+), 54 deletions(-) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 4fbbdfa8b327..df27b8d7e78f 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -469,6 +469,12 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) compound_group->pgsizes = pe->table_group.pgsizes; } + /* + * The gpu would have been added to the iommu group that's created + * for the PE. Pull it out now. + */ + iommu_del_device(&gpdev->dev); + /* * I'm not sure this is strictly required, but it's probably a good idea * since the table_group for the PE is going to be attached to the @@ -478,7 +484,9 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) */ iommu_group_put(pe->table_group.group); + /* now put the GPU into the compound group */ pnv_comp_attach_table_group(npucomp, pe); + iommu_add_device(compound_group, &gpdev->dev); return compound_group; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9e0776a06e60..ad332948fc9a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1774,12 +1774,10 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev) WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); pdev->dev.archdata.dma_offset = pe->tce_bypass_base; set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); - /* - * Note: iommu_add_device() will fail here as - * for physical PE: the device is already added by now; - * for virtual PE: sysfs entries are not ready yet and - * tce_iommu_bus_notifier will add the device to a group later. - */ + + /* PEs with a DMA weight of zero won't have a group */ + if (pe->table_group.group) + iommu_add_device(&pe->table_group, &pdev->dev); } /* @@ -2628,39 +2626,20 @@ static void pnv_pci_ioda_setup_iommu_api(void) struct pnv_ioda_pe *pe; /* - * There are 4 types of PEs: - * - PNV_IODA_PE_BUS: a downstream port with an adapter, - * created from pnv_pci_setup_bridge(); - * - PNV_IODA_PE_BUS_ALL: a PCI-PCIX bridge with devices behind it, - * created from pnv_pci_setup_bridge(); - * - PNV_IODA_PE_VF: a SRIOV virtual function, - * created from pnv_pcibios_sriov_enable(); - * - PNV_IODA_PE_DEV: an NPU or OCAPI device, - * created from pnv_pci_ioda_fixup(). + * For non-nvlink devices the IOMMU group is registered when the PE is + * configured and devices are added to the group when the per-device + * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is + * only initialise for "normal" IODA PHBs. * - * Normally a PE is represented by an IOMMU group, however for - * devices with side channels the groups need to be more strict. + * For NVLink devices we need to ensure the NVLinks and the GPU end up + * in the same IOMMU group, so that's handled here. */ list_for_each_entry(hose, &hose_list, list_node) { phb = hose->private_data; - if (phb->type == PNV_PHB_NPU_NVLINK || - phb->type == PNV_PHB_NPU_OCAPI) - continue; - - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - struct iommu_table_group *table_group; - - table_group = pnv_try_setup_npu_table_group(pe); - if (!table_group) { - if (!pnv_pci_ioda_pe_dma_weight(pe)) - continue; - - table_group = &pe->table_group; - } - pnv_ioda_setup_bus_iommu_group(pe, table_group, - pe->pbus); - } + if (phb->type == PNV_PHB_IODA2) + list_for_each_entry(pe, &phb->ioda.pe_list, list) + pnv_try_setup_npu_table_group(pe); } /* diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 5bf818246339..091fe1cf386b 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -955,28 +955,8 @@ static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct pci_dev *pdev; - struct pci_dn *pdn; - struct pnv_ioda_pe *pe; - struct pci_controller *hose; - struct pnv_phb *phb; switch (action) { - case BUS_NOTIFY_ADD_DEVICE: - pdev = to_pci_dev(dev); - pdn = pci_get_pdn(pdev); - hose = pci_bus_to_host(pdev->bus); - phb = hose->private_data; - - WARN_ON_ONCE(!phb); - if (!pdn || pdn->pe_number == IODA_INVALID_PE || !phb) - return 0; - - pe = &phb->ioda.pe_array[pdn->pe_number]; - if (!pe->table_group.group) - return 0; - iommu_add_device(&pe->table_group, dev); - return 0; case BUS_NOTIFY_DEL_DEVICE: iommu_del_device(dev); return 0;