vfio/mdev: Check globally for duplicate devices

When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus.  We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.

Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.

Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device.  This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers.  Vendor drivers can trivially provide this
serialization internally if necessary.  Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs.  If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error.  While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition.  Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error.  Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error.  Now a user
would see -EAGAIN while the device is in this transitory state.

Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
Alex Williamson 2018-05-15 13:53:55 -06:00
parent d59502bc05
commit 002fe996f6
3 changed files with 43 additions and 68 deletions

View File

@ -145,6 +145,11 @@ The functions in the mdev_parent_ops structure are as follows:
* create: allocate basic resources in a driver for a mediated device * create: allocate basic resources in a driver for a mediated device
* remove: free resources in a driver when a mediated device is destroyed * remove: free resources in a driver when a mediated device is destroyed
(Note that mdev-core provides no implicit serialization of create/remove
callbacks per mdev parent device, per mdev type, or any other categorization.
Vendor drivers are expected to be fully asynchronous in this respect or
provide their own internal resource protection.)
The callbacks in the mdev_parent_ops structure are as follows: The callbacks in the mdev_parent_ops structure are as follows:
* open: open callback of mediated device * open: open callback of mediated device

View File

@ -66,34 +66,6 @@ uuid_le mdev_uuid(struct mdev_device *mdev)
} }
EXPORT_SYMBOL(mdev_uuid); EXPORT_SYMBOL(mdev_uuid);
static int _find_mdev_device(struct device *dev, void *data)
{
struct mdev_device *mdev;
if (!dev_is_mdev(dev))
return 0;
mdev = to_mdev_device(dev);
if (uuid_le_cmp(mdev->uuid, *(uuid_le *)data) == 0)
return 1;
return 0;
}
static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid)
{
struct device *dev;
dev = device_find_child(parent->dev, &uuid, _find_mdev_device);
if (dev) {
put_device(dev);
return true;
}
return false;
}
/* Should be called holding parent_list_lock */ /* Should be called holding parent_list_lock */
static struct mdev_parent *__find_parent_device(struct device *dev) static struct mdev_parent *__find_parent_device(struct device *dev)
{ {
@ -221,7 +193,6 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
} }
kref_init(&parent->ref); kref_init(&parent->ref);
mutex_init(&parent->lock);
parent->dev = dev; parent->dev = dev;
parent->ops = ops; parent->ops = ops;
@ -297,6 +268,10 @@ static void mdev_device_release(struct device *dev)
{ {
struct mdev_device *mdev = to_mdev_device(dev); struct mdev_device *mdev = to_mdev_device(dev);
mutex_lock(&mdev_list_lock);
list_del(&mdev->next);
mutex_unlock(&mdev_list_lock);
dev_dbg(&mdev->dev, "MDEV: destroying\n"); dev_dbg(&mdev->dev, "MDEV: destroying\n");
kfree(mdev); kfree(mdev);
} }
@ -304,7 +279,7 @@ static void mdev_device_release(struct device *dev)
int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
{ {
int ret; int ret;
struct mdev_device *mdev; struct mdev_device *mdev, *tmp;
struct mdev_parent *parent; struct mdev_parent *parent;
struct mdev_type *type = to_mdev_type(kobj); struct mdev_type *type = to_mdev_type(kobj);
@ -312,21 +287,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
if (!parent) if (!parent)
return -EINVAL; return -EINVAL;
mutex_lock(&parent->lock); mutex_lock(&mdev_list_lock);
/* Check for duplicate */ /* Check for duplicate */
if (mdev_device_exist(parent, uuid)) { list_for_each_entry(tmp, &mdev_list, next) {
if (!uuid_le_cmp(tmp->uuid, uuid)) {
mutex_unlock(&mdev_list_lock);
ret = -EEXIST; ret = -EEXIST;
goto create_err; goto mdev_fail;
}
} }
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
if (!mdev) { if (!mdev) {
mutex_unlock(&mdev_list_lock);
ret = -ENOMEM; ret = -ENOMEM;
goto create_err; goto mdev_fail;
} }
memcpy(&mdev->uuid, &uuid, sizeof(uuid_le)); memcpy(&mdev->uuid, &uuid, sizeof(uuid_le));
list_add(&mdev->next, &mdev_list);
mutex_unlock(&mdev_list_lock);
mdev->parent = parent; mdev->parent = parent;
kref_init(&mdev->ref); kref_init(&mdev->ref);
@ -338,35 +320,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
ret = device_register(&mdev->dev); ret = device_register(&mdev->dev);
if (ret) { if (ret) {
put_device(&mdev->dev); put_device(&mdev->dev);
goto create_err; goto mdev_fail;
} }
ret = mdev_device_create_ops(kobj, mdev); ret = mdev_device_create_ops(kobj, mdev);
if (ret) if (ret)
goto create_failed; goto create_fail;
ret = mdev_create_sysfs_files(&mdev->dev, type); ret = mdev_create_sysfs_files(&mdev->dev, type);
if (ret) { if (ret) {
mdev_device_remove_ops(mdev, true); mdev_device_remove_ops(mdev, true);
goto create_failed; goto create_fail;
} }
mdev->type_kobj = kobj; mdev->type_kobj = kobj;
mdev->active = true;
dev_dbg(&mdev->dev, "MDEV: created\n"); dev_dbg(&mdev->dev, "MDEV: created\n");
mutex_unlock(&parent->lock); return 0;
mutex_lock(&mdev_list_lock); create_fail:
list_add(&mdev->next, &mdev_list);
mutex_unlock(&mdev_list_lock);
return ret;
create_failed:
device_unregister(&mdev->dev); device_unregister(&mdev->dev);
mdev_fail:
create_err:
mutex_unlock(&parent->lock);
mdev_put_parent(parent); mdev_put_parent(parent);
return ret; return ret;
} }
@ -377,44 +352,39 @@ int mdev_device_remove(struct device *dev, bool force_remove)
struct mdev_parent *parent; struct mdev_parent *parent;
struct mdev_type *type; struct mdev_type *type;
int ret; int ret;
bool found = false;
mdev = to_mdev_device(dev); mdev = to_mdev_device(dev);
mutex_lock(&mdev_list_lock); mutex_lock(&mdev_list_lock);
list_for_each_entry(tmp, &mdev_list, next) { list_for_each_entry(tmp, &mdev_list, next) {
if (tmp == mdev) { if (tmp == mdev)
found = true;
break; break;
} }
if (tmp != mdev) {
mutex_unlock(&mdev_list_lock);
return -ENODEV;
} }
if (found) if (!mdev->active) {
list_del(&mdev->next);
mutex_unlock(&mdev_list_lock); mutex_unlock(&mdev_list_lock);
return -EAGAIN;
}
if (!found) mdev->active = false;
return -ENODEV; mutex_unlock(&mdev_list_lock);
type = to_mdev_type(mdev->type_kobj); type = to_mdev_type(mdev->type_kobj);
parent = mdev->parent; parent = mdev->parent;
mutex_lock(&parent->lock);
ret = mdev_device_remove_ops(mdev, force_remove); ret = mdev_device_remove_ops(mdev, force_remove);
if (ret) { if (ret) {
mutex_unlock(&parent->lock); mdev->active = true;
mutex_lock(&mdev_list_lock);
list_add(&mdev->next, &mdev_list);
mutex_unlock(&mdev_list_lock);
return ret; return ret;
} }
mdev_remove_sysfs_files(dev, type); mdev_remove_sysfs_files(dev, type);
device_unregister(dev); device_unregister(dev);
mutex_unlock(&parent->lock);
mdev_put_parent(parent); mdev_put_parent(parent);
return 0; return 0;

View File

@ -20,7 +20,6 @@ struct mdev_parent {
struct device *dev; struct device *dev;
const struct mdev_parent_ops *ops; const struct mdev_parent_ops *ops;
struct kref ref; struct kref ref;
struct mutex lock;
struct list_head next; struct list_head next;
struct kset *mdev_types_kset; struct kset *mdev_types_kset;
struct list_head type_list; struct list_head type_list;
@ -34,6 +33,7 @@ struct mdev_device {
struct kref ref; struct kref ref;
struct list_head next; struct list_head next;
struct kobject *type_kobj; struct kobject *type_kobj;
bool active;
}; };
#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) #define to_mdev_device(dev) container_of(dev, struct mdev_device, dev)