2022-09-22 16:20:26 -03:00
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright ( C ) 2012 Red Hat , Inc . All rights reserved .
*
* VFIO container ( / dev / vfio / vfio )
*/
# include <linux/file.h>
# include <linux/slab.h>
# include <linux/fs.h>
# include <linux/capability.h>
# include <linux/iommu.h>
# include <linux/miscdevice.h>
# include <linux/vfio.h>
# include <uapi/linux/vfio.h>
# include "vfio.h"
struct vfio_container {
struct kref kref ;
struct list_head group_list ;
struct rw_semaphore group_lock ;
struct vfio_iommu_driver * iommu_driver ;
void * iommu_data ;
bool noiommu ;
} ;
static struct vfio {
struct list_head iommu_drivers_list ;
struct mutex iommu_drivers_lock ;
} vfio ;
static void * vfio_noiommu_open ( unsigned long arg )
{
if ( arg ! = VFIO_NOIOMMU_IOMMU )
return ERR_PTR ( - EINVAL ) ;
if ( ! capable ( CAP_SYS_RAWIO ) )
return ERR_PTR ( - EPERM ) ;
return NULL ;
}
static void vfio_noiommu_release ( void * iommu_data )
{
}
static long vfio_noiommu_ioctl ( void * iommu_data ,
unsigned int cmd , unsigned long arg )
{
if ( cmd = = VFIO_CHECK_EXTENSION )
return vfio_noiommu & & ( arg = = VFIO_NOIOMMU_IOMMU ) ? 1 : 0 ;
return - ENOTTY ;
}
static int vfio_noiommu_attach_group ( void * iommu_data ,
struct iommu_group * iommu_group , enum vfio_group_type type )
{
return 0 ;
}
static void vfio_noiommu_detach_group ( void * iommu_data ,
struct iommu_group * iommu_group )
{
}
static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
. name = " vfio-noiommu " ,
. owner = THIS_MODULE ,
. open = vfio_noiommu_open ,
. release = vfio_noiommu_release ,
. ioctl = vfio_noiommu_ioctl ,
. attach_group = vfio_noiommu_attach_group ,
. detach_group = vfio_noiommu_detach_group ,
} ;
/*
* Only noiommu containers can use vfio - noiommu and noiommu containers can only
* use vfio - noiommu .
*/
static bool vfio_iommu_driver_allowed ( struct vfio_container * container ,
const struct vfio_iommu_driver * driver )
{
if ( ! IS_ENABLED ( CONFIG_VFIO_NOIOMMU ) )
return true ;
return container - > noiommu = = ( driver - > ops = = & vfio_noiommu_ops ) ;
}
/*
* IOMMU driver registration
*/
int vfio_register_iommu_driver ( const struct vfio_iommu_driver_ops * ops )
{
struct vfio_iommu_driver * driver , * tmp ;
if ( WARN_ON ( ! ops - > register_device ! = ! ops - > unregister_device ) )
return - EINVAL ;
driver = kzalloc ( sizeof ( * driver ) , GFP_KERNEL ) ;
if ( ! driver )
return - ENOMEM ;
driver - > ops = ops ;
mutex_lock ( & vfio . iommu_drivers_lock ) ;
/* Check for duplicates */
list_for_each_entry ( tmp , & vfio . iommu_drivers_list , vfio_next ) {
if ( tmp - > ops = = ops ) {
mutex_unlock ( & vfio . iommu_drivers_lock ) ;
kfree ( driver ) ;
return - EINVAL ;
}
}
list_add ( & driver - > vfio_next , & vfio . iommu_drivers_list ) ;
mutex_unlock ( & vfio . iommu_drivers_lock ) ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( vfio_register_iommu_driver ) ;
void vfio_unregister_iommu_driver ( const struct vfio_iommu_driver_ops * ops )
{
struct vfio_iommu_driver * driver ;
mutex_lock ( & vfio . iommu_drivers_lock ) ;
list_for_each_entry ( driver , & vfio . iommu_drivers_list , vfio_next ) {
if ( driver - > ops = = ops ) {
list_del ( & driver - > vfio_next ) ;
mutex_unlock ( & vfio . iommu_drivers_lock ) ;
kfree ( driver ) ;
return ;
}
}
mutex_unlock ( & vfio . iommu_drivers_lock ) ;
}
EXPORT_SYMBOL_GPL ( vfio_unregister_iommu_driver ) ;
/*
* Container objects - containers are created when / dev / vfio / vfio is
* opened , but their lifecycle extends until the last user is done , so
* it ' s freed via kref . Must support container / group / device being
* closed in any order .
*/
static void vfio_container_release ( struct kref * kref )
{
struct vfio_container * container ;
container = container_of ( kref , struct vfio_container , kref ) ;
kfree ( container ) ;
}
static void vfio_container_get ( struct vfio_container * container )
{
kref_get ( & container - > kref ) ;
}
static void vfio_container_put ( struct vfio_container * container )
{
kref_put ( & container - > kref , vfio_container_release ) ;
}
void vfio_device_container_register ( struct vfio_device * device )
{
struct vfio_iommu_driver * iommu_driver =
device - > group - > container - > iommu_driver ;
if ( iommu_driver & & iommu_driver - > ops - > register_device )
iommu_driver - > ops - > register_device (
device - > group - > container - > iommu_data , device ) ;
}
void vfio_device_container_unregister ( struct vfio_device * device )
{
struct vfio_iommu_driver * iommu_driver =
device - > group - > container - > iommu_driver ;
if ( iommu_driver & & iommu_driver - > ops - > unregister_device )
iommu_driver - > ops - > unregister_device (
device - > group - > container - > iommu_data , device ) ;
}
2022-11-29 16:31:49 -04:00
static long
vfio_container_ioctl_check_extension ( struct vfio_container * container ,
unsigned long arg )
2022-09-22 16:20:26 -03:00
{
struct vfio_iommu_driver * driver ;
long ret = 0 ;
down_read ( & container - > group_lock ) ;
driver = container - > iommu_driver ;
switch ( arg ) {
/* No base extensions yet */
default :
/*
* If no driver is set , poll all registered drivers for
* extensions and return the first positive result . If
* a driver is already set , further queries will be passed
* only to that driver .
*/
if ( ! driver ) {
mutex_lock ( & vfio . iommu_drivers_lock ) ;
list_for_each_entry ( driver , & vfio . iommu_drivers_list ,
vfio_next ) {
if ( ! list_empty ( & container - > group_list ) & &
! vfio_iommu_driver_allowed ( container ,
driver ) )
continue ;
if ( ! try_module_get ( driver - > ops - > owner ) )
continue ;
ret = driver - > ops - > ioctl ( NULL ,
VFIO_CHECK_EXTENSION ,
arg ) ;
module_put ( driver - > ops - > owner ) ;
if ( ret > 0 )
break ;
}
mutex_unlock ( & vfio . iommu_drivers_lock ) ;
} else
ret = driver - > ops - > ioctl ( container - > iommu_data ,
VFIO_CHECK_EXTENSION , arg ) ;
}
up_read ( & container - > group_lock ) ;
return ret ;
}
/* hold write lock on container->group_lock */
static int __vfio_container_attach_groups ( struct vfio_container * container ,
struct vfio_iommu_driver * driver ,
void * data )
{
struct vfio_group * group ;
int ret = - ENODEV ;
list_for_each_entry ( group , & container - > group_list , container_next ) {
ret = driver - > ops - > attach_group ( data , group - > iommu_group ,
group - > type ) ;
if ( ret )
goto unwind ;
}
return ret ;
unwind :
list_for_each_entry_continue_reverse ( group , & container - > group_list ,
container_next ) {
driver - > ops - > detach_group ( data , group - > iommu_group ) ;
}
return ret ;
}
static long vfio_ioctl_set_iommu ( struct vfio_container * container ,
unsigned long arg )
{
struct vfio_iommu_driver * driver ;
long ret = - ENODEV ;
down_write ( & container - > group_lock ) ;
/*
* The container is designed to be an unprivileged interface while
* the group can be assigned to specific users . Therefore , only by
* adding a group to a container does the user get the privilege of
* enabling the iommu , which may allocate finite resources . There
* is no unset_iommu , but by removing all the groups from a container ,
* the container is deprivileged and returns to an unset state .
*/
if ( list_empty ( & container - > group_list ) | | container - > iommu_driver ) {
up_write ( & container - > group_lock ) ;
return - EINVAL ;
}
mutex_lock ( & vfio . iommu_drivers_lock ) ;
list_for_each_entry ( driver , & vfio . iommu_drivers_list , vfio_next ) {
void * data ;
if ( ! vfio_iommu_driver_allowed ( container , driver ) )
continue ;
if ( ! try_module_get ( driver - > ops - > owner ) )
continue ;
/*
* The arg magic for SET_IOMMU is the same as CHECK_EXTENSION ,
* so test which iommu driver reported support for this
* extension and call open on them . We also pass them the
* magic , allowing a single driver to support multiple
* interfaces if they ' d like .
*/
if ( driver - > ops - > ioctl ( NULL , VFIO_CHECK_EXTENSION , arg ) < = 0 ) {
module_put ( driver - > ops - > owner ) ;
continue ;
}
data = driver - > ops - > open ( arg ) ;
if ( IS_ERR ( data ) ) {
ret = PTR_ERR ( data ) ;
module_put ( driver - > ops - > owner ) ;
continue ;
}
ret = __vfio_container_attach_groups ( container , driver , data ) ;
if ( ret ) {
driver - > ops - > release ( data ) ;
module_put ( driver - > ops - > owner ) ;
continue ;
}
container - > iommu_driver = driver ;
container - > iommu_data = data ;
break ;
}
mutex_unlock ( & vfio . iommu_drivers_lock ) ;
up_write ( & container - > group_lock ) ;
return ret ;
}
static long vfio_fops_unl_ioctl ( struct file * filep ,
unsigned int cmd , unsigned long arg )
{
struct vfio_container * container = filep - > private_data ;
struct vfio_iommu_driver * driver ;
void * data ;
long ret = - EINVAL ;
if ( ! container )
return ret ;
switch ( cmd ) {
case VFIO_GET_API_VERSION :
ret = VFIO_API_VERSION ;
break ;
case VFIO_CHECK_EXTENSION :
ret = vfio_container_ioctl_check_extension ( container , arg ) ;
break ;
case VFIO_SET_IOMMU :
ret = vfio_ioctl_set_iommu ( container , arg ) ;
break ;
default :
driver = container - > iommu_driver ;
data = container - > iommu_data ;
if ( driver ) /* passthrough all unrecognized ioctls */
ret = driver - > ops - > ioctl ( data , cmd , arg ) ;
}
return ret ;
}
static int vfio_fops_open ( struct inode * inode , struct file * filep )
{
struct vfio_container * container ;
2023-01-08 17:44:24 +02:00
container = kzalloc ( sizeof ( * container ) , GFP_KERNEL_ACCOUNT ) ;
2022-09-22 16:20:26 -03:00
if ( ! container )
return - ENOMEM ;
INIT_LIST_HEAD ( & container - > group_list ) ;
init_rwsem ( & container - > group_lock ) ;
kref_init ( & container - > kref ) ;
filep - > private_data = container ;
return 0 ;
}
static int vfio_fops_release ( struct inode * inode , struct file * filep )
{
struct vfio_container * container = filep - > private_data ;
filep - > private_data = NULL ;
vfio_container_put ( container ) ;
return 0 ;
}
static const struct file_operations vfio_fops = {
. owner = THIS_MODULE ,
. open = vfio_fops_open ,
. release = vfio_fops_release ,
. unlocked_ioctl = vfio_fops_unl_ioctl ,
. compat_ioctl = compat_ptr_ioctl ,
} ;
struct vfio_container * vfio_container_from_file ( struct file * file )
{
struct vfio_container * container ;
/* Sanity check, is this really our fd? */
if ( file - > f_op ! = & vfio_fops )
return NULL ;
container = file - > private_data ;
WARN_ON ( ! container ) ; /* fget ensures we don't race vfio_release */
return container ;
}
static struct miscdevice vfio_dev = {
. minor = VFIO_MINOR ,
. name = " vfio " ,
. fops = & vfio_fops ,
. nodename = " vfio/vfio " ,
. mode = S_IRUGO | S_IWUGO ,
} ;
int vfio_container_attach_group ( struct vfio_container * container ,
struct vfio_group * group )
{
struct vfio_iommu_driver * driver ;
int ret = 0 ;
2022-09-29 11:59:25 -03:00
lockdep_assert_held ( & group - > group_lock ) ;
2022-09-22 16:20:26 -03:00
if ( group - > type = = VFIO_NO_IOMMU & & ! capable ( CAP_SYS_RAWIO ) )
return - EPERM ;
down_write ( & container - > group_lock ) ;
/* Real groups and fake groups cannot mix */
if ( ! list_empty ( & container - > group_list ) & &
container - > noiommu ! = ( group - > type = = VFIO_NO_IOMMU ) ) {
ret = - EPERM ;
goto out_unlock_container ;
}
if ( group - > type = = VFIO_IOMMU ) {
ret = iommu_group_claim_dma_owner ( group - > iommu_group , group ) ;
if ( ret )
goto out_unlock_container ;
}
driver = container - > iommu_driver ;
if ( driver ) {
ret = driver - > ops - > attach_group ( container - > iommu_data ,
group - > iommu_group ,
group - > type ) ;
if ( ret ) {
if ( group - > type = = VFIO_IOMMU )
iommu_group_release_dma_owner (
group - > iommu_group ) ;
goto out_unlock_container ;
}
}
group - > container = container ;
group - > container_users = 1 ;
container - > noiommu = ( group - > type = = VFIO_NO_IOMMU ) ;
list_add ( & group - > container_next , & container - > group_list ) ;
/* Get a reference on the container and mark a user within the group */
vfio_container_get ( container ) ;
out_unlock_container :
up_write ( & container - > group_lock ) ;
return ret ;
}
void vfio_group_detach_container ( struct vfio_group * group )
{
struct vfio_container * container = group - > container ;
struct vfio_iommu_driver * driver ;
2022-09-29 11:59:25 -03:00
lockdep_assert_held ( & group - > group_lock ) ;
2022-09-22 16:20:26 -03:00
WARN_ON ( group - > container_users ! = 1 ) ;
down_write ( & container - > group_lock ) ;
driver = container - > iommu_driver ;
if ( driver )
driver - > ops - > detach_group ( container - > iommu_data ,
group - > iommu_group ) ;
if ( group - > type = = VFIO_IOMMU )
iommu_group_release_dma_owner ( group - > iommu_group ) ;
group - > container = NULL ;
group - > container_users = 0 ;
list_del ( & group - > container_next ) ;
/* Detaching the last group deprivileges a container, remove iommu */
if ( driver & & list_empty ( & container - > group_list ) ) {
driver - > ops - > release ( container - > iommu_data ) ;
module_put ( driver - > ops - > owner ) ;
container - > iommu_driver = NULL ;
container - > iommu_data = NULL ;
}
up_write ( & container - > group_lock ) ;
vfio_container_put ( container ) ;
}
2022-11-29 16:31:48 -04:00
int vfio_group_use_container ( struct vfio_group * group )
2022-09-22 16:20:26 -03:00
{
2022-09-29 11:59:25 -03:00
lockdep_assert_held ( & group - > group_lock ) ;
2022-09-22 16:20:26 -03:00
2022-11-29 16:31:50 -04:00
/*
* The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
* VFIO_SET_IOMMU hasn ' t been done yet .
*/
if ( ! group - > container - > iommu_driver )
2022-09-22 16:20:26 -03:00
return - EINVAL ;
if ( group - > type = = VFIO_NO_IOMMU & & ! capable ( CAP_SYS_RAWIO ) )
return - EPERM ;
get_file ( group - > opened_file ) ;
group - > container_users + + ;
return 0 ;
}
2022-11-29 16:31:48 -04:00
void vfio_group_unuse_container ( struct vfio_group * group )
2022-09-22 16:20:26 -03:00
{
2022-11-29 16:31:48 -04:00
lockdep_assert_held ( & group - > group_lock ) ;
2022-11-29 16:31:47 -04:00
2022-11-29 16:31:48 -04:00
WARN_ON ( group - > container_users < = 1 ) ;
group - > container_users - - ;
fput ( group - > opened_file ) ;
2022-09-22 16:20:26 -03:00
}
2022-11-10 18:57:01 -08:00
int vfio_device_container_pin_pages ( struct vfio_device * device ,
dma_addr_t iova , int npage ,
int prot , struct page * * pages )
2022-09-22 16:20:26 -03:00
{
2022-11-10 18:57:01 -08:00
struct vfio_container * container = device - > group - > container ;
struct iommu_group * iommu_group = device - > group - > iommu_group ;
2022-11-29 16:31:52 -04:00
struct vfio_iommu_driver * driver = container - > iommu_driver ;
2022-09-22 16:20:26 -03:00
if ( npage > VFIO_PIN_PAGES_MAX_ENTRIES )
return - E2BIG ;
2022-11-29 16:31:52 -04:00
if ( unlikely ( ! driver | | ! driver - > ops - > pin_pages ) )
return - ENOTTY ;
return driver - > ops - > pin_pages ( container - > iommu_data , iommu_group , iova ,
npage , prot , pages ) ;
2022-09-22 16:20:26 -03:00
}
2022-11-10 18:57:01 -08:00
void vfio_device_container_unpin_pages ( struct vfio_device * device ,
dma_addr_t iova , int npage )
2022-09-22 16:20:26 -03:00
{
2022-11-10 18:57:01 -08:00
struct vfio_container * container = device - > group - > container ;
2022-09-22 16:20:26 -03:00
if ( WARN_ON ( npage < = 0 | | npage > VFIO_PIN_PAGES_MAX_ENTRIES ) )
return ;
2022-11-29 16:31:52 -04:00
container - > iommu_driver - > ops - > unpin_pages ( container - > iommu_data , iova ,
npage ) ;
2022-09-22 16:20:26 -03:00
}
2022-11-10 18:57:01 -08:00
int vfio_device_container_dma_rw ( struct vfio_device * device ,
dma_addr_t iova , void * data ,
size_t len , bool write )
2022-09-22 16:20:26 -03:00
{
2022-11-10 18:57:01 -08:00
struct vfio_container * container = device - > group - > container ;
2022-11-29 16:31:52 -04:00
struct vfio_iommu_driver * driver = container - > iommu_driver ;
2022-09-22 16:20:26 -03:00
2022-11-29 16:31:52 -04:00
if ( unlikely ( ! driver | | ! driver - > ops - > dma_rw ) )
return - ENOTTY ;
return driver - > ops - > dma_rw ( container - > iommu_data , iova , data , len ,
write ) ;
2022-09-22 16:20:26 -03:00
}
int __init vfio_container_init ( void )
{
int ret ;
mutex_init ( & vfio . iommu_drivers_lock ) ;
INIT_LIST_HEAD ( & vfio . iommu_drivers_list ) ;
ret = misc_register ( & vfio_dev ) ;
if ( ret ) {
pr_err ( " vfio: misc device register failed \n " ) ;
return ret ;
}
if ( IS_ENABLED ( CONFIG_VFIO_NOIOMMU ) ) {
ret = vfio_register_iommu_driver ( & vfio_noiommu_ops ) ;
if ( ret )
goto err_misc ;
}
return 0 ;
err_misc :
misc_deregister ( & vfio_dev ) ;
return ret ;
}
void vfio_container_cleanup ( void )
{
if ( IS_ENABLED ( CONFIG_VFIO_NOIOMMU ) )
vfio_unregister_iommu_driver ( & vfio_noiommu_ops ) ;
misc_deregister ( & vfio_dev ) ;
mutex_destroy ( & vfio . iommu_drivers_lock ) ;
}
2022-11-29 16:31:53 -04:00
MODULE_ALIAS_MISCDEV ( VFIO_MINOR ) ;
MODULE_ALIAS ( " devname:vfio/vfio " ) ;