vfio: simplify iommu group allocation for mediated devices

Reuse the logic in vfio_noiommu_group_alloc to allocate a fake
single-device iommu group for mediated devices by factoring out a common
function, and replacing the noiommu boolean field in struct vfio_group
with an enum to distinguish the three different kinds of groups.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20210924155705.4258-8-hch@lst.de
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
Christoph Hellwig 2021-09-24 17:56:57 +02:00 committed by Alex Williamson
parent c04ac34078
commit c68ea0d00a
8 changed files with 76 additions and 72 deletions

View File

@ -351,7 +351,7 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev)
list_add(&matrix_mdev->node, &matrix_dev->mdev_list); list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
mutex_unlock(&matrix_dev->lock); mutex_unlock(&matrix_dev->lock);
ret = vfio_register_group_dev(&matrix_mdev->vdev); ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev);
if (ret) if (ret)
goto err_list; goto err_list;
dev_set_drvdata(&mdev->dev, matrix_mdev); dev_set_drvdata(&mdev->dev, matrix_mdev);

View File

@ -13,60 +13,23 @@
#include "mdev_private.h" #include "mdev_private.h"
static int mdev_attach_iommu(struct mdev_device *mdev)
{
int ret;
struct iommu_group *group;
group = iommu_group_alloc();
if (IS_ERR(group))
return PTR_ERR(group);
ret = iommu_group_add_device(group, &mdev->dev);
if (!ret)
dev_info(&mdev->dev, "MDEV: group_id = %d\n",
iommu_group_id(group));
iommu_group_put(group);
return ret;
}
static void mdev_detach_iommu(struct mdev_device *mdev)
{
iommu_group_remove_device(&mdev->dev);
dev_info(&mdev->dev, "MDEV: detaching iommu\n");
}
static int mdev_probe(struct device *dev) static int mdev_probe(struct device *dev)
{ {
struct mdev_driver *drv = struct mdev_driver *drv =
container_of(dev->driver, struct mdev_driver, driver); container_of(dev->driver, struct mdev_driver, driver);
struct mdev_device *mdev = to_mdev_device(dev);
int ret;
ret = mdev_attach_iommu(mdev); if (!drv->probe)
if (ret) return 0;
return ret; return drv->probe(to_mdev_device(dev));
if (drv->probe) {
ret = drv->probe(mdev);
if (ret)
mdev_detach_iommu(mdev);
}
return ret;
} }
static void mdev_remove(struct device *dev) static void mdev_remove(struct device *dev)
{ {
struct mdev_driver *drv = struct mdev_driver *drv =
container_of(dev->driver, struct mdev_driver, driver); container_of(dev->driver, struct mdev_driver, driver);
struct mdev_device *mdev = to_mdev_device(dev);
if (drv->remove) if (drv->remove)
drv->remove(mdev); drv->remove(to_mdev_device(dev));
mdev_detach_iommu(mdev);
} }
static int mdev_match(struct device *dev, struct device_driver *drv) static int mdev_match(struct device *dev, struct device_driver *drv)

View File

@ -119,7 +119,7 @@ static int vfio_mdev_probe(struct mdev_device *mdev)
return -ENOMEM; return -ENOMEM;
vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops); vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops);
ret = vfio_register_group_dev(vdev); ret = vfio_register_emulated_iommu_dev(vdev);
if (ret) if (ret)
goto out_uninit; goto out_uninit;

View File

@ -67,6 +67,30 @@ struct vfio_unbound_dev {
struct list_head unbound_next; struct list_head unbound_next;
}; };
enum vfio_group_type {
/*
* Physical device with IOMMU backing.
*/
VFIO_IOMMU,
/*
* Virtual device without IOMMU backing. The VFIO core fakes up an
* iommu_group as the iommu_group sysfs interface is part of the
* userspace ABI. The user of these devices must not be able to
* directly trigger unmediated DMA.
*/
VFIO_EMULATED_IOMMU,
/*
* Physical device without IOMMU backing. The VFIO core fakes up an
* iommu_group as the iommu_group sysfs interface is part of the
* userspace ABI. Users can trigger unmediated DMA by the device,
* usage is highly dangerous, requires an explicit opt-in and will
* taint the kernel.
*/
VFIO_NO_IOMMU,
};
struct vfio_group { struct vfio_group {
struct kref kref; struct kref kref;
int minor; int minor;
@ -83,7 +107,7 @@ struct vfio_group {
struct mutex unbound_lock; struct mutex unbound_lock;
atomic_t opened; atomic_t opened;
wait_queue_head_t container_q; wait_queue_head_t container_q;
bool noiommu; enum vfio_group_type type;
unsigned int dev_counter; unsigned int dev_counter;
struct kvm *kvm; struct kvm *kvm;
struct blocking_notifier_head notifier; struct blocking_notifier_head notifier;
@ -336,7 +360,7 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
* Group objects - create, release, get, put, search * Group objects - create, release, get, put, search
*/ */
static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
bool noiommu) enum vfio_group_type type)
{ {
struct vfio_group *group, *tmp; struct vfio_group *group, *tmp;
struct device *dev; struct device *dev;
@ -355,7 +379,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
atomic_set(&group->opened, 0); atomic_set(&group->opened, 0);
init_waitqueue_head(&group->container_q); init_waitqueue_head(&group->container_q);
group->iommu_group = iommu_group; group->iommu_group = iommu_group;
group->noiommu = noiommu; group->type = type;
BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
group->nb.notifier_call = vfio_iommu_group_notifier; group->nb.notifier_call = vfio_iommu_group_notifier;
@ -391,8 +415,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
} }
dev = device_create(vfio.class, NULL, dev = device_create(vfio.class, NULL,
MKDEV(MAJOR(vfio.group_devt), minor), MKDEV(MAJOR(vfio.group_devt), minor), group, "%s%d",
group, "%s%d", group->noiommu ? "noiommu-" : "", group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
iommu_group_id(iommu_group)); iommu_group_id(iommu_group));
if (IS_ERR(dev)) { if (IS_ERR(dev)) {
vfio_free_group_minor(minor); vfio_free_group_minor(minor);
@ -778,8 +802,8 @@ void vfio_uninit_group_dev(struct vfio_device *device)
} }
EXPORT_SYMBOL_GPL(vfio_uninit_group_dev); EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
#ifdef CONFIG_VFIO_NOIOMMU static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev) enum vfio_group_type type)
{ {
struct iommu_group *iommu_group; struct iommu_group *iommu_group;
struct vfio_group *group; struct vfio_group *group;
@ -794,7 +818,7 @@ static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev)
if (ret) if (ret)
goto out_put_group; goto out_put_group;
group = vfio_create_group(iommu_group, true); group = vfio_create_group(iommu_group, type);
if (IS_ERR(group)) { if (IS_ERR(group)) {
ret = PTR_ERR(group); ret = PTR_ERR(group);
goto out_remove_device; goto out_remove_device;
@ -808,7 +832,6 @@ out_put_group:
iommu_group_put(iommu_group); iommu_group_put(iommu_group);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
#endif
static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
{ {
@ -824,7 +847,7 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
* bus. Taint the kernel because we're about to give a DMA * bus. Taint the kernel because we're about to give a DMA
* capable device to a user without IOMMU protection. * capable device to a user without IOMMU protection.
*/ */
group = vfio_noiommu_group_alloc(dev); group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
if (!IS_ERR(group)) { if (!IS_ERR(group)) {
add_taint(TAINT_USER, LOCKDEP_STILL_OK); add_taint(TAINT_USER, LOCKDEP_STILL_OK);
dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
@ -841,7 +864,7 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
goto out_put; goto out_put;
/* a newly created vfio_group keeps the reference. */ /* a newly created vfio_group keeps the reference. */
group = vfio_create_group(iommu_group, false); group = vfio_create_group(iommu_group, VFIO_IOMMU);
if (IS_ERR(group)) if (IS_ERR(group))
goto out_put; goto out_put;
return group; return group;
@ -851,10 +874,13 @@ out_put:
return group; return group;
} }
int vfio_register_group_dev(struct vfio_device *device) static int __vfio_register_dev(struct vfio_device *device,
struct vfio_group *group)
{ {
struct vfio_device *existing_device; struct vfio_device *existing_device;
struct vfio_group *group;
if (IS_ERR(group))
return PTR_ERR(group);
/* /*
* If the driver doesn't specify a set then the device is added to a * If the driver doesn't specify a set then the device is added to a
@ -863,16 +889,13 @@ int vfio_register_group_dev(struct vfio_device *device)
if (!device->dev_set) if (!device->dev_set)
vfio_assign_device_set(device, device); vfio_assign_device_set(device, device);
group = vfio_group_find_or_alloc(device->dev);
if (IS_ERR(group))
return PTR_ERR(group);
existing_device = vfio_group_get_device(group, device->dev); existing_device = vfio_group_get_device(group, device->dev);
if (existing_device) { if (existing_device) {
dev_WARN(device->dev, "Device already exists on group %d\n", dev_WARN(device->dev, "Device already exists on group %d\n",
iommu_group_id(group->iommu_group)); iommu_group_id(group->iommu_group));
vfio_device_put(existing_device); vfio_device_put(existing_device);
if (group->noiommu) if (group->type == VFIO_NO_IOMMU ||
group->type == VFIO_EMULATED_IOMMU)
iommu_group_remove_device(device->dev); iommu_group_remove_device(device->dev);
vfio_group_put(group); vfio_group_put(group);
return -EBUSY; return -EBUSY;
@ -891,8 +914,25 @@ int vfio_register_group_dev(struct vfio_device *device)
return 0; return 0;
} }
int vfio_register_group_dev(struct vfio_device *device)
{
return __vfio_register_dev(device,
vfio_group_find_or_alloc(device->dev));
}
EXPORT_SYMBOL_GPL(vfio_register_group_dev); EXPORT_SYMBOL_GPL(vfio_register_group_dev);
/*
* Register a virtual device without IOMMU backing. The user of this
* device must not be able to directly trigger unmediated DMA.
*/
int vfio_register_emulated_iommu_dev(struct vfio_device *device)
{
return __vfio_register_dev(device,
vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
}
EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
/** /**
* Get a reference to the vfio_device for a device. Even if the * Get a reference to the vfio_device for a device. Even if the
* caller thinks they own the device, they could be racing with a * caller thinks they own the device, they could be racing with a
@ -1019,7 +1059,7 @@ void vfio_unregister_group_dev(struct vfio_device *device)
if (list_empty(&group->device_list)) if (list_empty(&group->device_list))
wait_event(group->container_q, !group->container); wait_event(group->container_q, !group->container);
if (group->noiommu) if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
iommu_group_remove_device(device->dev); iommu_group_remove_device(device->dev);
/* Matches the get in vfio_register_group_dev() */ /* Matches the get in vfio_register_group_dev() */
@ -1368,7 +1408,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
if (atomic_read(&group->container_users)) if (atomic_read(&group->container_users))
return -EINVAL; return -EINVAL;
if (group->noiommu && !capable(CAP_SYS_RAWIO)) if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
return -EPERM; return -EPERM;
f = fdget(container_fd); f = fdget(container_fd);
@ -1388,7 +1428,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
/* Real groups and fake groups cannot mix */ /* Real groups and fake groups cannot mix */
if (!list_empty(&container->group_list) && if (!list_empty(&container->group_list) &&
container->noiommu != group->noiommu) { container->noiommu != (group->type == VFIO_NO_IOMMU)) {
ret = -EPERM; ret = -EPERM;
goto unlock_out; goto unlock_out;
} }
@ -1402,7 +1442,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
} }
group->container = container; group->container = container;
container->noiommu = group->noiommu; container->noiommu = (group->type == VFIO_NO_IOMMU);
list_add(&group->container_next, &container->group_list); list_add(&group->container_next, &container->group_list);
/* Get a reference on the container and mark a user within the group */ /* Get a reference on the container and mark a user within the group */
@ -1426,7 +1466,7 @@ static int vfio_group_add_container_user(struct vfio_group *group)
if (!atomic_inc_not_zero(&group->container_users)) if (!atomic_inc_not_zero(&group->container_users))
return -EINVAL; return -EINVAL;
if (group->noiommu) { if (group->type == VFIO_NO_IOMMU) {
atomic_dec(&group->container_users); atomic_dec(&group->container_users);
return -EPERM; return -EPERM;
} }
@ -1451,7 +1491,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
!group->container->iommu_driver || !vfio_group_viable(group)) !group->container->iommu_driver || !vfio_group_viable(group))
return -EINVAL; return -EINVAL;
if (group->noiommu && !capable(CAP_SYS_RAWIO)) if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
return -EPERM; return -EPERM;
device = vfio_device_get_from_name(group, buf); device = vfio_device_get_from_name(group, buf);
@ -1498,7 +1538,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
fd_install(fdno, filep); fd_install(fdno, filep);
if (group->noiommu) if (group->type == VFIO_NO_IOMMU)
dev_warn(device->dev, "vfio-noiommu device opened by user " dev_warn(device->dev, "vfio-noiommu device opened by user "
"(%s:%d)\n", current->comm, task_pid_nr(current)); "(%s:%d)\n", current->comm, task_pid_nr(current));
return fdno; return fdno;
@ -1594,7 +1634,7 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
if (!group) if (!group)
return -ENODEV; return -ENODEV;
if (group->noiommu && !capable(CAP_SYS_RAWIO)) { if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
vfio_group_put(group); vfio_group_put(group);
return -EPERM; return -EPERM;
} }

View File

@ -75,6 +75,7 @@ void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
const struct vfio_device_ops *ops); const struct vfio_device_ops *ops);
void vfio_uninit_group_dev(struct vfio_device *device); void vfio_uninit_group_dev(struct vfio_device *device);
int vfio_register_group_dev(struct vfio_device *device); int vfio_register_group_dev(struct vfio_device *device);
int vfio_register_emulated_iommu_dev(struct vfio_device *device);
void vfio_unregister_group_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device);
extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
extern void vfio_device_put(struct vfio_device *device); extern void vfio_device_put(struct vfio_device *device);

View File

@ -553,7 +553,7 @@ static int mbochs_probe(struct mdev_device *mdev)
mbochs_create_config_space(mdev_state); mbochs_create_config_space(mdev_state);
mbochs_reset(mdev_state); mbochs_reset(mdev_state);
ret = vfio_register_group_dev(&mdev_state->vdev); ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
if (ret) if (ret)
goto err_mem; goto err_mem;
dev_set_drvdata(&mdev->dev, mdev_state); dev_set_drvdata(&mdev->dev, mdev_state);

View File

@ -258,7 +258,7 @@ static int mdpy_probe(struct mdev_device *mdev)
mdpy_count++; mdpy_count++;
ret = vfio_register_group_dev(&mdev_state->vdev); ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
if (ret) if (ret)
goto err_mem; goto err_mem;
dev_set_drvdata(&mdev->dev, mdev_state); dev_set_drvdata(&mdev->dev, mdev_state);

View File

@ -741,7 +741,7 @@ static int mtty_probe(struct mdev_device *mdev)
mtty_create_config_space(mdev_state); mtty_create_config_space(mdev_state);
ret = vfio_register_group_dev(&mdev_state->vdev); ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
if (ret) if (ret)
goto err_vconfig; goto err_vconfig;
dev_set_drvdata(&mdev->dev, mdev_state); dev_set_drvdata(&mdev->dev, mdev_state);