linux/arch/s390/pci/pci_bus.c

380 lines
8.6 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2020
*
* Author(s):
* Pierre Morel <pmorel@linux.ibm.com>
*
*/
#define KMSG_COMPONENT "zpci"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/export.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/jump_label.h>
#include <linux/pci.h>
#include <linux/printk.h>
#include <asm/pci_clp.h>
#include <asm/pci_dma.h>
#include "pci_bus.h"
#include "pci_iov.h"
static LIST_HEAD(zbus_list);
static DEFINE_MUTEX(zbus_list_lock);
static int zpci_nb_devices;
/* zpci_bus_prepare_device - Prepare a zPCI function for scanning
* @zdev: the zPCI function to be prepared
*
* The PCI resources for the function are set up and added to its zbus and the
* function is enabled. The function must be added to a zbus which must have
* a PCI bus created. If an error occurs the zPCI function is not enabled.
*
* Return: 0 on success, an error code otherwise
*/
static int zpci_bus_prepare_device(struct zpci_dev *zdev)
{
PCI: s390: Fix use-after-free of PCI resources with per-function hotplug On s390 PCI functions may be hotplugged individually even when they belong to a multi-function device. In particular on an SR-IOV device VFs may be removed and later re-added. In commit a50297cf8235 ("s390/pci: separate zbus creation from scanning") it was missed however that struct pci_bus and struct zpci_bus's resource list retained a reference to the PCI functions MMIO resources even though those resources are released and freed on hot-unplug. These stale resources may subsequently be claimed when the PCI function re-appears resulting in use-after-free. One idea of fixing this use-after-free in s390 specific code that was investigated was to simply keep resources around from the moment a PCI function first appeared until the whole virtual PCI bus created for a multi-function device disappears. The problem with this however is that due to the requirement of artificial MMIO addreesses (address cookies) extra logic is then needed to keep the address cookies compatible on re-plug. At the same time the MMIO resources semantically belong to the PCI function so tying their lifecycle to the function seems more logical. Instead a simpler approach is to remove the resources of an individually hot-unplugged PCI function from the PCI bus's resource list while keeping the resources of other PCI functions on the PCI bus untouched. This is done by introducing pci_bus_remove_resource() to remove an individual resource. Similarly the resource also needs to be removed from the struct zpci_bus's resource list. It turns out however, that there is really no need to add the MMIO resources to the struct zpci_bus's resource list at all and instead we can simply use the zpci_bar_struct's resource pointer directly. Fixes: a50297cf8235 ("s390/pci: separate zbus creation from scanning") Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com> Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com> Acked-by: Bjorn Helgaas <bhelgaas@google.com> Link: https://lore.kernel.org/r/20230306151014.60913-2-schnelle@linux.ibm.com Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2023-03-06 16:10:11 +01:00
int rc, i;
if (!zdev_enabled(zdev)) {
rc = zpci_enable_device(zdev);
if (rc)
return rc;
rc = zpci_dma_init_device(zdev);
if (rc) {
zpci_disable_device(zdev);
return rc;
}
}
if (!zdev->has_resources) {
PCI: s390: Fix use-after-free of PCI resources with per-function hotplug On s390 PCI functions may be hotplugged individually even when they belong to a multi-function device. In particular on an SR-IOV device VFs may be removed and later re-added. In commit a50297cf8235 ("s390/pci: separate zbus creation from scanning") it was missed however that struct pci_bus and struct zpci_bus's resource list retained a reference to the PCI functions MMIO resources even though those resources are released and freed on hot-unplug. These stale resources may subsequently be claimed when the PCI function re-appears resulting in use-after-free. One idea of fixing this use-after-free in s390 specific code that was investigated was to simply keep resources around from the moment a PCI function first appeared until the whole virtual PCI bus created for a multi-function device disappears. The problem with this however is that due to the requirement of artificial MMIO addreesses (address cookies) extra logic is then needed to keep the address cookies compatible on re-plug. At the same time the MMIO resources semantically belong to the PCI function so tying their lifecycle to the function seems more logical. Instead a simpler approach is to remove the resources of an individually hot-unplugged PCI function from the PCI bus's resource list while keeping the resources of other PCI functions on the PCI bus untouched. This is done by introducing pci_bus_remove_resource() to remove an individual resource. Similarly the resource also needs to be removed from the struct zpci_bus's resource list. It turns out however, that there is really no need to add the MMIO resources to the struct zpci_bus's resource list at all and instead we can simply use the zpci_bar_struct's resource pointer directly. Fixes: a50297cf8235 ("s390/pci: separate zbus creation from scanning") Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com> Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com> Acked-by: Bjorn Helgaas <bhelgaas@google.com> Link: https://lore.kernel.org/r/20230306151014.60913-2-schnelle@linux.ibm.com Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
2023-03-06 16:10:11 +01:00
zpci_setup_bus_resources(zdev);
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
if (zdev->bars[i].res)
pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res, 0);
}
}
return 0;
}
/* zpci_bus_scan_device - Scan a single device adding it to the PCI core
* @zdev: the zdev to be scanned
*
* Scans the PCI function making it available to the common PCI code.
*
* Return: 0 on success, an error value otherwise
*/
int zpci_bus_scan_device(struct zpci_dev *zdev)
{
struct pci_dev *pdev;
int rc;
rc = zpci_bus_prepare_device(zdev);
if (rc)
return rc;
pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn);
if (!pdev)
return -ENODEV;
pci_lock_rescan_remove();
pci_bus_add_device(pdev);
pci_unlock_rescan_remove();
return 0;
}
/* zpci_bus_remove_device - Removes the given zdev from the PCI core
* @zdev: the zdev to be removed from the PCI core
* @set_error: if true the device's error state is set to permanent failure
*
* Sets a zPCI device to a configured but offline state; the zPCI
* device is still accessible through its hotplug slot and the zPCI
* API but is removed from the common code PCI bus, making it
* no longer available to drivers.
*/
void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error)
{
struct zpci_bus *zbus = zdev->zbus;
struct pci_dev *pdev;
if (!zdev->zbus->bus)
return;
pdev = pci_get_slot(zbus->bus, zdev->devfn);
if (pdev) {
if (set_error)
pdev->error_state = pci_channel_io_perm_failure;
if (pdev->is_virtfn) {
zpci_iov_remove_virtfn(pdev, zdev->vfn);
/* balance pci_get_slot */
pci_dev_put(pdev);
return;
}
pci_stop_and_remove_bus_device_locked(pdev);
/* balance pci_get_slot */
pci_dev_put(pdev);
}
}
/* zpci_bus_scan_bus - Scan all configured zPCI functions on the bus
* @zbus: the zbus to be scanned
*
* Enables and scans all PCI functions on the bus making them available to the
* common PCI code. If a PCI function fails to be initialized an error will be
* returned but attempts will still be made for all other functions on the bus.
*
* Return: 0 on success, an error value otherwise
*/
int zpci_bus_scan_bus(struct zpci_bus *zbus)
{
struct zpci_dev *zdev;
int devfn, rc, ret = 0;
for (devfn = 0; devfn < ZPCI_FUNCTIONS_PER_BUS; devfn++) {
zdev = zbus->function[devfn];
if (zdev && zdev->state == ZPCI_FN_STATE_CONFIGURED) {
rc = zpci_bus_prepare_device(zdev);
if (rc)
ret = -EIO;
}
}
pci_lock_rescan_remove();
pci_scan_child_bus(zbus->bus);
pci_bus_add_devices(zbus->bus);
pci_unlock_rescan_remove();
return ret;
}
s390/pci: separate zbus registration from scanning Now that the zbus can be created without being scanned we can go one step further and make registering a device to a zbus independent from scanning it. This way the zbus handling becomes much more natural in that functions can be registered on the zbus to be scanned later more closely resembling the handling of both real PCI hardware and other virtual PCI busses like Hyper-V's virtual PCI bus (see for example drivers/pci/controller/pci-hyperv.c:create_root_hv_pci_bus()). Having zbus registration separate from scanning allows us to return fully initialized but still disabled zdevs from zpci_create_device() which can then be configured just as we would configure a zdev from standby (minus the SCLP Configure already done by the platform). There is still the exception that a PCI function with non-zero devfn can be plugged before its PCI bus, which depends on the function with zero devfn, is created. In this case the zdev returend from zpci_create_device() is still missing its bus, hotplug slot, and resources which need to be created later but at least it doesn't wait in the enabled state and can otherwise be treated as initialized. With this we also separate the initial PCI scan using CLP List PCI Functions into two phases. In the CLP loop's callback we only register each function with a virtual zbus creating the latter as needed. Then, after we have built this virtual PCI topology based on our list of zbusses, we can make use of the common code functionality to scan each complete zbus as a separate child bus. Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com> Acked-by: Pierre Morel <pmorel@linux.ibm.com> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2021-02-12 11:57:58 +01:00
/* zpci_bus_scan_busses - Scan all registered busses
*
* Scan all available zbusses
*
*/
void zpci_bus_scan_busses(void)
{
struct zpci_bus *zbus = NULL;
mutex_lock(&zbus_list_lock);
list_for_each_entry(zbus, &zbus_list, bus_next) {
zpci_bus_scan_bus(zbus);
cond_resched();
}
mutex_unlock(&zbus_list_lock);
}
/* zpci_bus_create_pci_bus - Create the PCI bus associated with this zbus
* @zbus: the zbus holding the zdevices
* @fr: PCI root function that will determine the bus's domain, and bus speeed
* @ops: the pci operations
*
* The PCI function @fr determines the domain (its UID), multifunction property
* and maximum bus speed of the entire bus.
*
* Return: 0 on success, an error code otherwise
*/
static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, struct pci_ops *ops)
{
struct pci_bus *bus;
int domain;
domain = zpci_alloc_domain((u16)fr->uid);
if (domain < 0)
return domain;
zbus->domain_nr = domain;
zbus->multifunction = fr->rid_available;
zbus->max_bus_speed = fr->max_bus_speed;
/*
* Note that the zbus->resources are taken over and zbus->resources
* is empty after a successful call
*/
bus = pci_create_root_bus(NULL, ZPCI_BUS_NR, ops, zbus, &zbus->resources);
if (!bus) {
zpci_free_domain(zbus->domain_nr);
return -EFAULT;
}
zbus->bus = bus;
return 0;
}
static void zpci_bus_release(struct kref *kref)
{
struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref);
if (zbus->bus) {
pci_lock_rescan_remove();
pci_stop_root_bus(zbus->bus);
zpci_free_domain(zbus->domain_nr);
pci_free_resource_list(&zbus->resources);
pci_remove_root_bus(zbus->bus);
pci_unlock_rescan_remove();
}
mutex_lock(&zbus_list_lock);
list_del(&zbus->bus_next);
mutex_unlock(&zbus_list_lock);
kfree(zbus);
}
static void zpci_bus_put(struct zpci_bus *zbus)
{
kref_put(&zbus->kref, zpci_bus_release);
}
static struct zpci_bus *zpci_bus_get(int pchid)
{
struct zpci_bus *zbus;
mutex_lock(&zbus_list_lock);
list_for_each_entry(zbus, &zbus_list, bus_next) {
if (pchid == zbus->pchid) {
kref_get(&zbus->kref);
goto out_unlock;
}
}
zbus = NULL;
out_unlock:
mutex_unlock(&zbus_list_lock);
return zbus;
}
static struct zpci_bus *zpci_bus_alloc(int pchid)
{
struct zpci_bus *zbus;
zbus = kzalloc(sizeof(*zbus), GFP_KERNEL);
if (!zbus)
return NULL;
zbus->pchid = pchid;
INIT_LIST_HEAD(&zbus->bus_next);
mutex_lock(&zbus_list_lock);
list_add_tail(&zbus->bus_next, &zbus_list);
mutex_unlock(&zbus_list_lock);
kref_init(&zbus->kref);
INIT_LIST_HEAD(&zbus->resources);
zbus->bus_resource.start = 0;
zbus->bus_resource.end = ZPCI_BUS_NR;
zbus->bus_resource.flags = IORESOURCE_BUS;
pci_add_resource(&zbus->resources, &zbus->bus_resource);
return zbus;
}
s390/pci: fix PF/VF linking on hot plug Currently there are four places in which a PCI function is scanned and made available to drivers: 1. In pci_scan_root_bus() as part of the initial zbus creation. 2. In zpci_bus_add_devices() when registering a device in configured state on a zbus that has already been scanned. 3. When a function is already known to zPCI (in reserved/standby state) and configuration is triggered through firmware by PEC 0x301. 4. When a device is already known to zPCI (in standby/reserved state) and configuration is triggered from within Linux using enable_slot(). The PF/VF linking step and setting of pdev->is_virtfn introduced with commit e5794cf1a270 ("s390/pci: create links between PFs and VFs") was only triggered for the second case, which is where VFs created through sriov_numvfs usually land. However unlike some other platforms but like POWER VFs can be individually enabled/disabled through /sys/bus/pci/slots. Fix this by doing VF setup as part of pcibios_bus_add_device() which is called in all of the above cases. Finally to remove the PF/VF links call the common code pci_iov_remove_virtfn() function to remove linked VFs. This takes care of the necessary sysfs cleanup. Fixes: e5794cf1a270 ("s390/pci: create links between PFs and VFs") Cc: <stable@vger.kernel.org> # 5.8: 2f0230b2f2d5: s390/pci: re-introduce zpci_remove_device() Cc: <stable@vger.kernel.org> # 5.8 Acked-by: Pierre Morel <pmorel@linux.ibm.com> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2020-08-03 17:58:10 +02:00
void pcibios_bus_add_device(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
/*
* With pdev->no_vf_scan the common PCI probing code does not
* perform PF/VF linking.
*/
if (zdev->vfn) {
zpci_iov_setup_virtfn(zdev->zbus, pdev, zdev->vfn);
pdev->no_command_memory = 1;
}
s390/pci: fix PF/VF linking on hot plug Currently there are four places in which a PCI function is scanned and made available to drivers: 1. In pci_scan_root_bus() as part of the initial zbus creation. 2. In zpci_bus_add_devices() when registering a device in configured state on a zbus that has already been scanned. 3. When a function is already known to zPCI (in reserved/standby state) and configuration is triggered through firmware by PEC 0x301. 4. When a device is already known to zPCI (in standby/reserved state) and configuration is triggered from within Linux using enable_slot(). The PF/VF linking step and setting of pdev->is_virtfn introduced with commit e5794cf1a270 ("s390/pci: create links between PFs and VFs") was only triggered for the second case, which is where VFs created through sriov_numvfs usually land. However unlike some other platforms but like POWER VFs can be individually enabled/disabled through /sys/bus/pci/slots. Fix this by doing VF setup as part of pcibios_bus_add_device() which is called in all of the above cases. Finally to remove the PF/VF links call the common code pci_iov_remove_virtfn() function to remove linked VFs. This takes care of the necessary sysfs cleanup. Fixes: e5794cf1a270 ("s390/pci: create links between PFs and VFs") Cc: <stable@vger.kernel.org> # 5.8: 2f0230b2f2d5: s390/pci: re-introduce zpci_remove_device() Cc: <stable@vger.kernel.org> # 5.8 Acked-by: Pierre Morel <pmorel@linux.ibm.com> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2020-08-03 17:58:10 +02:00
}
static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
{
int rc = -EINVAL;
if (zbus->function[zdev->devfn]) {
pr_err("devfn %04x is already assigned\n", zdev->devfn);
return rc;
}
zdev->zbus = zbus;
zbus->function[zdev->devfn] = zdev;
zpci_nb_devices++;
if (zbus->multifunction && !zdev->rid_available) {
WARN_ONCE(1, "rid_available not set for multifunction\n");
goto error;
}
rc = zpci_init_slot(zdev);
if (rc)
goto error;
zdev->has_hp_slot = 1;
return 0;
error:
zbus->function[zdev->devfn] = NULL;
zdev->zbus = NULL;
zpci_nb_devices--;
return rc;
}
int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
{
struct zpci_bus *zbus = NULL;
int rc = -EBADF;
if (zpci_nb_devices == ZPCI_NR_DEVICES) {
pr_warn("Adding PCI function %08x failed because the configured limit of %d is reached\n",
zdev->fid, ZPCI_NR_DEVICES);
return -ENOSPC;
}
if (zdev->devfn >= ZPCI_FUNCTIONS_PER_BUS)
return -EINVAL;
if (!s390_pci_no_rid && zdev->rid_available)
zbus = zpci_bus_get(zdev->pchid);
if (!zbus) {
zbus = zpci_bus_alloc(zdev->pchid);
if (!zbus)
return -ENOMEM;
}
if (!zbus->bus) {
/* The UID of the first PCI function registered with a zpci_bus
* is used as the domain number for that bus. Currently there
* is exactly one zpci_bus per domain.
*/
rc = zpci_bus_create_pci_bus(zbus, zdev, ops);
if (rc)
goto error;
}
rc = zpci_bus_add_device(zbus, zdev);
if (rc)
goto error;
return 0;
error:
pr_err("Adding PCI function %08x failed\n", zdev->fid);
zpci_bus_put(zbus);
return rc;
}
void zpci_bus_device_unregister(struct zpci_dev *zdev)
{
struct zpci_bus *zbus = zdev->zbus;
zpci_nb_devices--;
zbus->function[zdev->devfn] = NULL;
zpci_bus_put(zbus);
}