2021-09-24 17:56:59 +02:00
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright ( C ) 2012 Red Hat , Inc . All rights reserved .
* Author : Alex Williamson < alex . williamson @ redhat . com >
*/
2022-09-22 16:20:19 -03:00
# ifndef __VFIO_VFIO_H__
# define __VFIO_VFIO_H__
2022-11-25 03:26:42 -08:00
# include <linux/file.h>
2022-09-22 16:20:19 -03:00
# include <linux/device.h>
# include <linux/cdev.h>
# include <linux/module.h>
2023-01-18 13:50:28 -04:00
# include <linux/vfio.h>
2022-09-22 16:20:19 -03:00
2022-11-29 16:31:50 -04:00
struct iommufd_ctx ;
2022-09-22 16:20:19 -03:00
struct iommu_group ;
2022-09-22 16:20:26 -03:00
struct vfio_container ;
2021-09-24 17:56:59 +02:00
2023-07-18 06:55:26 -07:00
struct vfio_device_file {
struct vfio_device * device ;
2023-07-18 06:55:34 -07:00
struct vfio_group * group ;
2023-07-18 06:55:28 -07:00
2023-07-18 06:55:32 -07:00
u8 access_granted ;
2023-07-18 06:55:37 -07:00
u32 devid ; /* only valid when iommufd is valid */
2023-07-18 06:55:28 -07:00
spinlock_t kvm_ref_lock ; /* protect kvm field */
struct kvm * kvm ;
2023-07-18 06:55:31 -07:00
struct iommufd_ctx * iommufd ; /* protected by struct vfio_device_set::lock */
2023-07-18 06:55:26 -07:00
} ;
2022-11-25 03:26:42 -08:00
void vfio_device_put_registration ( struct vfio_device * device ) ;
bool vfio_device_try_get_registration ( struct vfio_device * device ) ;
2023-07-18 06:55:31 -07:00
int vfio_df_open ( struct vfio_device_file * df ) ;
void vfio_df_close ( struct vfio_device_file * df ) ;
2023-07-18 06:55:26 -07:00
struct vfio_device_file *
vfio_allocate_device_file ( struct vfio_device * device ) ;
2022-11-25 03:26:42 -08:00
extern const struct file_operations vfio_device_fops ;
2023-07-18 06:55:50 -07:00
# ifdef CONFIG_VFIO_NOIOMMU
extern bool vfio_noiommu __read_mostly ;
# else
enum { vfio_noiommu = false } ;
# endif
2021-09-24 17:57:01 +02:00
enum vfio_group_type {
/*
* Physical device with IOMMU backing .
*/
VFIO_IOMMU ,
/*
* Virtual device without IOMMU backing . The VFIO core fakes up an
* iommu_group as the iommu_group sysfs interface is part of the
* userspace ABI . The user of these devices must not be able to
* directly trigger unmediated DMA .
*/
VFIO_EMULATED_IOMMU ,
/*
* Physical device without IOMMU backing . The VFIO core fakes up an
* iommu_group as the iommu_group sysfs interface is part of the
* userspace ABI . Users can trigger unmediated DMA by the device ,
* usage is highly dangerous , requires an explicit opt - in and will
* taint the kernel .
*/
VFIO_NO_IOMMU ,
} ;
2023-07-18 06:55:50 -07:00
# if IS_ENABLED(CONFIG_VFIO_GROUP)
2022-09-22 16:20:26 -03:00
struct vfio_group {
struct device dev ;
struct cdev cdev ;
vfio: Follow a strict lifetime for struct iommu_group
The iommu_group comes from the struct device that a driver has been bound
to and then created a struct vfio_device against. To keep the iommu layer
sane we want to have a simple rule that only an attached driver should be
using the iommu API. Particularly only an attached driver should hold
ownership.
In VFIO's case since it uses the group APIs and it shares between
different drivers it is a bit more complicated, but the principle still
holds.
Solve this by waiting for all users of the vfio_group to stop before
allowing vfio_unregister_group_dev() to complete. This is done with a new
completion to know when the users go away and an additional refcount to
keep track of how many device drivers are sharing the vfio group. The last
driver to be unregistered will clean up the group.
This solves crashes in the S390 iommu driver that come because VFIO ends
up racing releasing ownership (which attaches the default iommu_domain to
the device) with the removal of that same device from the iommu
driver. This is a side case that iommu drivers should not have to cope
with.
iommu driver failed to attach the default/blocking domain
WARNING: CPU: 0 PID: 5082 at drivers/iommu/iommu.c:1961 iommu_detach_group+0x6c/0x80
Modules linked in: macvtap macvlan tap vfio_pci vfio_pci_core irqbypass vfio_virqfd kvm nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables nfnetlink mlx5_ib sunrpc ib_uverbs ism smc uvdevice ib_core s390_trng eadm_sch tape_3590 tape tape_class vfio_ccw mdev vfio_iommu_type1 vfio zcrypt_cex4 sch_fq_codel configfs ghash_s390 prng chacha_s390 libchacha aes_s390 mlx5_core des_s390 libdes sha3_512_s390 nvme sha3_256_s390 sha512_s390 sha256_s390 sha1_s390 sha_common nvme_core zfcp scsi_transport_fc pkey zcrypt rng_core autofs4
CPU: 0 PID: 5082 Comm: qemu-system-s39 Tainted: G W 6.0.0-rc3 #5
Hardware name: IBM 3931 A01 782 (LPAR)
Krnl PSW : 0704c00180000000 000000095bb10d28 (iommu_detach_group+0x70/0x80)
R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3
Krnl GPRS: 0000000000000001 0000000900000027 0000000000000039 000000095c97ffe0
00000000fffeffff 00000009fc290000 00000000af1fda50 00000000af590b58
00000000af1fdaf0 0000000135c7a320 0000000135e52258 0000000135e52200
00000000a29e8000 00000000af590b40 000000095bb10d24 0000038004b13c98
Krnl Code: 000000095bb10d18: c020003d56fc larl %r2,000000095c2bbb10
000000095bb10d1e: c0e50019d901 brasl %r14,000000095be4bf20
#000000095bb10d24: af000000 mc 0,0
>000000095bb10d28: b904002a lgr %r2,%r10
000000095bb10d2c: ebaff0a00004 lmg %r10,%r15,160(%r15)
000000095bb10d32: c0f4001aa867 brcl 15,000000095be65e00
000000095bb10d38: c004002168e0 brcl 0,000000095bf3def8
000000095bb10d3e: eb6ff0480024 stmg %r6,%r15,72(%r15)
Call Trace:
[<000000095bb10d28>] iommu_detach_group+0x70/0x80
([<000000095bb10d24>] iommu_detach_group+0x6c/0x80)
[<000003ff80243b0e>] vfio_iommu_type1_detach_group+0x136/0x6c8 [vfio_iommu_type1]
[<000003ff80137780>] __vfio_group_unset_container+0x58/0x158 [vfio]
[<000003ff80138a16>] vfio_group_fops_unl_ioctl+0x1b6/0x210 [vfio]
pci 0004:00:00.0: Removing from iommu group 4
[<000000095b5b62e8>] __s390x_sys_ioctl+0xc0/0x100
[<000000095be5d3b4>] __do_syscall+0x1d4/0x200
[<000000095be6c072>] system_call+0x82/0xb0
Last Breaking-Event-Address:
[<000000095be4bf80>] __warn_printk+0x60/0x68
It indicates that domain->ops->attach_dev() failed because the driver has
already passed the point of destructing the device.
Fixes: 9ac8545199a1 ("iommu: Fix use-after-free in iommu_release_device")
Reported-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/0-v2-a3c5f4429e2a+55-iommu_group_lifetime_jgg@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2022-09-22 21:06:10 -03:00
/*
* When drivers is non - zero a driver is attached to the struct device
* that provided the iommu_group and thus the iommu_group is a valid
* pointer . When drivers is 0 the driver is being detached . Once users
* reaches 0 then the iommu_group is invalid .
*/
refcount_t drivers ;
2022-09-22 16:20:26 -03:00
unsigned int container_users ;
struct iommu_group * iommu_group ;
struct vfio_container * container ;
struct list_head device_list ;
struct mutex device_lock ;
struct list_head vfio_next ;
2022-11-29 16:31:54 -04:00
# if IS_ENABLED(CONFIG_VFIO_CONTAINER)
2022-09-22 16:20:26 -03:00
struct list_head container_next ;
2022-11-29 16:31:54 -04:00
# endif
2022-09-22 16:20:26 -03:00
enum vfio_group_type type ;
2022-09-29 11:59:25 -03:00
struct mutex group_lock ;
2022-09-22 16:20:26 -03:00
struct kvm * kvm ;
struct file * opened_file ;
struct blocking_notifier_head notifier ;
2022-11-29 16:31:50 -04:00
struct iommufd_ctx * iommufd ;
2023-02-03 16:50:26 -05:00
spinlock_t kvm_ref_lock ;
2023-07-18 06:55:33 -07:00
unsigned int cdev_device_open_cnt ;
2022-09-22 16:20:26 -03:00
} ;
2023-07-18 06:55:33 -07:00
int vfio_device_block_group ( struct vfio_device * device ) ;
void vfio_device_unblock_group ( struct vfio_device * device ) ;
2022-11-25 03:26:42 -08:00
int vfio_device_set_group ( struct vfio_device * device ,
enum vfio_group_type type ) ;
void vfio_device_remove_group ( struct vfio_device * device ) ;
void vfio_device_group_register ( struct vfio_device * device ) ;
void vfio_device_group_unregister ( struct vfio_device * device ) ;
int vfio_device_group_use_iommu ( struct vfio_device * device ) ;
void vfio_device_group_unuse_iommu ( struct vfio_device * device ) ;
2023-07-18 06:55:31 -07:00
void vfio_df_group_close ( struct vfio_device_file * df ) ;
2023-07-18 06:55:27 -07:00
struct vfio_group * vfio_group_from_file ( struct file * file ) ;
bool vfio_group_enforced_coherent ( struct vfio_group * group ) ;
void vfio_group_set_kvm ( struct vfio_group * group , struct kvm * kvm ) ;
2022-11-25 03:26:42 -08:00
bool vfio_device_has_container ( struct vfio_device * device ) ;
int __init vfio_group_init ( void ) ;
void vfio_group_cleanup ( void ) ;
2023-01-18 13:50:28 -04:00
static inline bool vfio_device_is_noiommu ( struct vfio_device * vdev )
{
return IS_ENABLED ( CONFIG_VFIO_NOIOMMU ) & &
vdev - > group - > type = = VFIO_NO_IOMMU ;
}
2023-07-18 06:55:50 -07:00
# else
struct vfio_group ;
static inline int vfio_device_block_group ( struct vfio_device * device )
{
return 0 ;
}
static inline void vfio_device_unblock_group ( struct vfio_device * device )
{
}
static inline int vfio_device_set_group ( struct vfio_device * device ,
enum vfio_group_type type )
{
return 0 ;
}
static inline void vfio_device_remove_group ( struct vfio_device * device )
{
}
static inline void vfio_device_group_register ( struct vfio_device * device )
{
}
static inline void vfio_device_group_unregister ( struct vfio_device * device )
{
}
static inline int vfio_device_group_use_iommu ( struct vfio_device * device )
{
return - EOPNOTSUPP ;
}
static inline void vfio_device_group_unuse_iommu ( struct vfio_device * device )
{
}
static inline void vfio_df_group_close ( struct vfio_device_file * df )
{
}
static inline struct vfio_group * vfio_group_from_file ( struct file * file )
{
return NULL ;
}
static inline bool vfio_group_enforced_coherent ( struct vfio_group * group )
{
return true ;
}
static inline void vfio_group_set_kvm ( struct vfio_group * group , struct kvm * kvm )
{
}
static inline bool vfio_device_has_container ( struct vfio_device * device )
{
return false ;
}
static inline int __init vfio_group_init ( void )
{
return 0 ;
}
static inline void vfio_group_cleanup ( void )
{
}
static inline bool vfio_device_is_noiommu ( struct vfio_device * vdev )
{
return false ;
}
# endif /* CONFIG_VFIO_GROUP */
2023-01-18 13:50:28 -04:00
2022-11-29 16:31:54 -04:00
# if IS_ENABLED(CONFIG_VFIO_CONTAINER)
2021-09-24 17:56:59 +02:00
/**
* struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
*/
struct vfio_iommu_driver_ops {
char * name ;
struct module * owner ;
void * ( * open ) ( unsigned long arg ) ;
void ( * release ) ( void * iommu_data ) ;
long ( * ioctl ) ( void * iommu_data , unsigned int cmd ,
unsigned long arg ) ;
int ( * attach_group ) ( void * iommu_data ,
2021-09-24 17:57:01 +02:00
struct iommu_group * group ,
enum vfio_group_type ) ;
2021-09-24 17:56:59 +02:00
void ( * detach_group ) ( void * iommu_data ,
struct iommu_group * group ) ;
int ( * pin_pages ) ( void * iommu_data ,
struct iommu_group * group ,
2022-07-22 19:02:51 -07:00
dma_addr_t user_iova ,
2021-09-24 17:56:59 +02:00
int npage , int prot ,
2022-07-22 19:02:56 -07:00
struct page * * pages ) ;
2022-07-22 19:02:47 -07:00
void ( * unpin_pages ) ( void * iommu_data ,
2022-07-22 19:02:51 -07:00
dma_addr_t user_iova , int npage ) ;
2022-07-19 21:02:49 -03:00
void ( * register_device ) ( void * iommu_data ,
struct vfio_device * vdev ) ;
void ( * unregister_device ) ( void * iommu_data ,
struct vfio_device * vdev ) ;
2021-09-24 17:56:59 +02:00
int ( * dma_rw ) ( void * iommu_data , dma_addr_t user_iova ,
void * data , size_t count , bool write ) ;
struct iommu_domain * ( * group_iommu_domain ) ( void * iommu_data ,
struct iommu_group * group ) ;
} ;
2022-09-22 16:20:26 -03:00
struct vfio_iommu_driver {
const struct vfio_iommu_driver_ops * ops ;
struct list_head vfio_next ;
} ;
2021-09-24 17:56:59 +02:00
int vfio_register_iommu_driver ( const struct vfio_iommu_driver_ops * ops ) ;
void vfio_unregister_iommu_driver ( const struct vfio_iommu_driver_ops * ops ) ;
2022-09-22 16:20:19 -03:00
2022-09-22 16:20:26 -03:00
struct vfio_container * vfio_container_from_file ( struct file * filep ) ;
2022-11-29 16:31:48 -04:00
int vfio_group_use_container ( struct vfio_group * group ) ;
void vfio_group_unuse_container ( struct vfio_group * group ) ;
2022-09-22 16:20:26 -03:00
int vfio_container_attach_group ( struct vfio_container * container ,
struct vfio_group * group ) ;
void vfio_group_detach_container ( struct vfio_group * group ) ;
void vfio_device_container_register ( struct vfio_device * device ) ;
void vfio_device_container_unregister ( struct vfio_device * device ) ;
2022-11-10 18:57:01 -08:00
int vfio_device_container_pin_pages ( struct vfio_device * device ,
dma_addr_t iova , int npage ,
int prot , struct page * * pages ) ;
void vfio_device_container_unpin_pages ( struct vfio_device * device ,
dma_addr_t iova , int npage ) ;
int vfio_device_container_dma_rw ( struct vfio_device * device ,
dma_addr_t iova , void * data ,
size_t len , bool write ) ;
2022-11-29 16:31:52 -04:00
2022-09-22 16:20:26 -03:00
int __init vfio_container_init ( void ) ;
void vfio_container_cleanup ( void ) ;
2022-11-29 16:31:54 -04:00
# else
static inline struct vfio_container *
vfio_container_from_file ( struct file * filep )
{
return NULL ;
}
static inline int vfio_group_use_container ( struct vfio_group * group )
{
return - EOPNOTSUPP ;
}
static inline void vfio_group_unuse_container ( struct vfio_group * group )
{
}
static inline int vfio_container_attach_group ( struct vfio_container * container ,
struct vfio_group * group )
{
return - EOPNOTSUPP ;
}
static inline void vfio_group_detach_container ( struct vfio_group * group )
{
}
static inline void vfio_device_container_register ( struct vfio_device * device )
{
}
static inline void vfio_device_container_unregister ( struct vfio_device * device )
{
}
2022-11-10 18:57:01 -08:00
static inline int vfio_device_container_pin_pages ( struct vfio_device * device ,
dma_addr_t iova , int npage ,
int prot , struct page * * pages )
2022-11-29 16:31:54 -04:00
{
return - EOPNOTSUPP ;
}
2022-11-10 18:57:01 -08:00
static inline void vfio_device_container_unpin_pages ( struct vfio_device * device ,
dma_addr_t iova , int npage )
2022-11-29 16:31:54 -04:00
{
}
2022-11-10 18:57:01 -08:00
static inline int vfio_device_container_dma_rw ( struct vfio_device * device ,
dma_addr_t iova , void * data ,
size_t len , bool write )
2022-11-29 16:31:54 -04:00
{
return - EOPNOTSUPP ;
}
static inline int vfio_container_init ( void )
{
return 0 ;
}
static inline void vfio_container_cleanup ( void )
{
}
# endif
2022-09-22 16:20:26 -03:00
2022-11-29 16:31:51 -04:00
# if IS_ENABLED(CONFIG_IOMMUFD)
2023-07-18 06:55:35 -07:00
bool vfio_iommufd_device_has_compat_ioas ( struct vfio_device * vdev ,
struct iommufd_ctx * ictx ) ;
2023-07-18 06:55:37 -07:00
int vfio_df_iommufd_bind ( struct vfio_device_file * df ) ;
void vfio_df_iommufd_unbind ( struct vfio_device_file * df ) ;
2023-07-18 06:55:36 -07:00
int vfio_iommufd_compat_attach_ioas ( struct vfio_device * device ,
struct iommufd_ctx * ictx ) ;
2022-11-29 16:31:51 -04:00
# else
2023-07-18 06:55:35 -07:00
static inline bool
vfio_iommufd_device_has_compat_ioas ( struct vfio_device * vdev ,
struct iommufd_ctx * ictx )
{
return false ;
}
2023-07-18 06:55:37 -07:00
static inline int vfio_df_iommufd_bind ( struct vfio_device_file * fd )
2022-11-29 16:31:51 -04:00
{
return - EOPNOTSUPP ;
}
2023-07-18 06:55:37 -07:00
static inline void vfio_df_iommufd_unbind ( struct vfio_device_file * df )
2022-11-29 16:31:51 -04:00
{
}
2023-07-18 06:55:36 -07:00
static inline int
vfio_iommufd_compat_attach_ioas ( struct vfio_device * device ,
struct iommufd_ctx * ictx )
{
return - EOPNOTSUPP ;
}
2022-11-29 16:31:51 -04:00
# endif
2023-07-18 06:55:48 -07:00
int vfio_df_ioctl_attach_pt ( struct vfio_device_file * df ,
struct vfio_device_attach_iommufd_pt __user * arg ) ;
int vfio_df_ioctl_detach_pt ( struct vfio_device_file * df ,
struct vfio_device_detach_iommufd_pt __user * arg ) ;
2023-07-18 06:55:43 -07:00
# if IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV)
void vfio_init_device_cdev ( struct vfio_device * device ) ;
static inline int vfio_device_add ( struct vfio_device * device )
{
/* cdev does not support noiommu device */
if ( vfio_device_is_noiommu ( device ) )
return device_add ( & device - > device ) ;
vfio_init_device_cdev ( device ) ;
return cdev_device_add ( & device - > cdev , & device - > device ) ;
}
static inline void vfio_device_del ( struct vfio_device * device )
{
if ( vfio_device_is_noiommu ( device ) )
device_del ( & device - > device ) ;
else
cdev_device_del ( & device - > cdev , & device - > device ) ;
}
int vfio_device_fops_cdev_open ( struct inode * inode , struct file * filep ) ;
2023-07-18 06:55:47 -07:00
long vfio_df_ioctl_bind_iommufd ( struct vfio_device_file * df ,
struct vfio_device_bind_iommufd __user * arg ) ;
void vfio_df_unbind_iommufd ( struct vfio_device_file * df ) ;
2023-07-18 06:55:43 -07:00
int vfio_cdev_init ( struct class * device_class ) ;
void vfio_cdev_cleanup ( void ) ;
# else
static inline void vfio_init_device_cdev ( struct vfio_device * device )
{
}
static inline int vfio_device_add ( struct vfio_device * device )
{
return device_add ( & device - > device ) ;
}
static inline void vfio_device_del ( struct vfio_device * device )
{
device_del ( & device - > device ) ;
}
static inline int vfio_device_fops_cdev_open ( struct inode * inode ,
struct file * filep )
{
return 0 ;
}
2023-07-18 06:55:47 -07:00
static inline long vfio_df_ioctl_bind_iommufd ( struct vfio_device_file * df ,
struct vfio_device_bind_iommufd __user * arg )
{
return - ENOTTY ;
}
static inline void vfio_df_unbind_iommufd ( struct vfio_device_file * df )
{
}
2023-07-18 06:55:43 -07:00
static inline int vfio_cdev_init ( struct class * device_class )
{
return 0 ;
}
static inline void vfio_cdev_cleanup ( void )
{
}
# endif /* CONFIG_VFIO_DEVICE_CDEV */
2022-12-05 11:29:20 -04:00
# if IS_ENABLED(CONFIG_VFIO_VIRQFD)
int __init vfio_virqfd_init ( void ) ;
void vfio_virqfd_exit ( void ) ;
# else
static inline int __init vfio_virqfd_init ( void )
{
return 0 ;
}
static inline void vfio_virqfd_exit ( void )
{
}
# endif
2024-01-04 15:15:43 -05:00
# if IS_ENABLED(CONFIG_KVM)
2023-07-18 06:55:44 -07:00
void vfio_device_get_kvm_safe ( struct vfio_device * device , struct kvm * kvm ) ;
2023-02-03 16:50:26 -05:00
void vfio_device_put_kvm ( struct vfio_device * device ) ;
# else
2023-07-18 06:55:44 -07:00
static inline void vfio_device_get_kvm_safe ( struct vfio_device * device ,
struct kvm * kvm )
2023-02-03 16:50:26 -05:00
{
}
static inline void vfio_device_put_kvm ( struct vfio_device * device )
{
}
# endif
2023-11-06 15:22:23 +08:00
# ifdef CONFIG_VFIO_DEBUGFS
void vfio_debugfs_create_root ( void ) ;
void vfio_debugfs_remove_root ( void ) ;
void vfio_device_debugfs_init ( struct vfio_device * vdev ) ;
void vfio_device_debugfs_exit ( struct vfio_device * vdev ) ;
# else
static inline void vfio_debugfs_create_root ( void ) { }
static inline void vfio_debugfs_remove_root ( void ) { }
static inline void vfio_device_debugfs_init ( struct vfio_device * vdev ) { }
static inline void vfio_device_debugfs_exit ( struct vfio_device * vdev ) { }
# endif /* CONFIG_VFIO_DEBUGFS */
2022-09-22 16:20:19 -03:00
# endif